diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index b03dec9ba4..8ec0a5689d 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -52,12 +52,10 @@ set(TARGET common)
 
 add_library(${TARGET} STATIC
     base64.hpp
-    chat.cpp
-    chat.h
-    chat-parser.cpp
-    chat-parser.h
-    chat-parser-xml-toolcall.h
-    chat-parser-xml-toolcall.cpp
+    chat-auto-parser-generator.cpp
+    chat-auto-parser-helpers.cpp
+    chat-auto-parser.h
+    chat-diff-analyzer.cpp
 	chat-peg-parser.cpp
     chat-peg-parser.h
     common.cpp
@@ -77,9 +75,9 @@ add_library(${TARGET} STATIC
     ngram-cache.h
     ngram-map.cpp
     ngram-map.h
-	peg-parser.cpp
+    peg-parser.cpp
     peg-parser.h
-	speculative.cpp
+    speculative.cpp
     spec-tuner.cpp
     spec-tuner.h
     unicode.cpp
@@ -88,6 +86,10 @@ add_library(${TARGET} STATIC
     ngram-mod.h
     regex-partial.cpp
     regex-partial.h
+    reasoning-budget.cpp
+    reasoning-budget.h
+    chat.cpp
+    chat.h
     jinja/lexer.cpp
     jinja/lexer.h
     jinja/parser.cpp
diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
new file mode 100644
index 0000000000..3eb1fa9a9c
--- /dev/null
+++ b/common/chat-auto-parser-generator.cpp
@@ -0,0 +1,469 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "json-schema-to-grammar.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <stdexcept>
+#include <string>
+
+using json = nlohmann::ordered_json;
+
+// Helper to iterate over tools/functions
+static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
+    for (const auto & tool : tools) {
+        if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
+            continue;
+        }
+        fn(tool);
+    }
+}
+
+namespace autoparser {
+
+parser_build_context::parser_build_context(common_chat_peg_builder & p, const generation_params & inputs) :
+    p(p),
+    inputs(inputs),
+    reasoning_parser(p.eps()) {}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
+                                                  const struct generation_params & inputs) {
+    // Run differential analysis to extract template structure
+    struct autoparser autoparser;
+    autoparser.analyze_template(tmpl);
+    return generate_parser(tmpl, inputs, autoparser);
+}
+
+common_chat_params peg_generator::generate_parser(const common_chat_template &    tmpl,
+                                                  const struct generation_params & inputs,
+                                                  const autoparser &              autoparser) {
+    // Create the result structure
+    common_chat_params data;
+    data.prompt           = common_chat_template_direct_apply(tmpl, inputs);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = autoparser.preserved_tokens;
+
+    auto parser = autoparser.build_parser(inputs);
+    data.parser = parser.save();
+
+    // Build grammar if tools are present
+    bool has_tools =
+        autoparser.tools.format.mode != tool_format::NONE && inputs.tools.is_array() && !inputs.tools.empty();
+    std::string trigger_marker = !autoparser.tools.format.section_start.empty() ? autoparser.tools.format.section_start :
+                                                                                  autoparser.tools.format.per_call_start;
+
+    bool has_response_format = !inputs.json_schema.empty() && inputs.json_schema.is_object();
+    bool include_grammar = has_response_format || (has_tools &&
+            ((inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO && !trigger_marker.empty()) ||
+              inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
+
+    if (include_grammar) {
+        data.grammar_lazy = !has_response_format && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.contains("parameters") ? function.at("parameters") : json::object();
+                builder.resolve_refs(schema);
+            });
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
+            }
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+
+        // Set grammar triggers based on tool section markers (fall back to per-call markers)
+        if (data.grammar_lazy) {
+            data.grammar_triggers = {
+                { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
+            };
+        }
+    }
+
+    return data;
+}
+
+common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
+    if (!analysis_complete) {
+        throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
+    }
+    return build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        parser_build_context ctx(p, inputs);
+        bool                 extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+
+        ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
+        ctx.content              = &content;
+        ctx.reasoning            = &reasoning;
+
+        // Build reasoning parser
+        ctx.reasoning_parser = reasoning.build_parser(ctx);
+
+        auto parser = p.eps();
+
+        bool has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+        bool has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+        bool pure_content        = reasoning.mode == reasoning_mode::NONE;
+
+        if (has_response_format) {
+            auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
+            parser = ctx.reasoning_parser + p.space() + p.choice({
+                p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
+                response_format
+            }) + p.end();
+            pure_content = false;
+        } else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
+            parser = tools.build_parser(ctx);
+            pure_content = false;
+        } else {
+            parser = content.build_parser(ctx);
+        }
+        return pure_content ? p.prefix(inputs.generation_prompt, reasoning.start) + parser : p.prefix(inputs.generation_prompt, reasoning.start) << parser;
+    });
+}
+
+common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (!ctx.extracting_reasoning) {
+        return p.eps();
+    }
+
+    if (mode == reasoning_mode::TAG_BASED || mode == reasoning_mode::TOOLS_ONLY) {
+        if (!end.empty()) {
+            if (!start.empty()) {
+                // Standard tag-based: optional(<think>reasoning</think>)
+                return p.optional(start + p.reasoning(p.until(end)) + end + p.space());
+            }
+            // Delimiter-style (empty start)
+            return p.optional(p.reasoning(p.until(end)) + end + p.space());
+        }
+    }
+
+    return p.eps();
+}
+
+common_peg_parser analyze_content::build_parser(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (is_always_wrapped()) {
+        if (ctx.extracting_reasoning) {
+            return ctx.reasoning_parser + start + p.content(p.until(end)) + end + p.end();
+        }
+        return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
+    }
+    return ctx.reasoning_parser + p.content(p.rest()) + p.end();
+}
+
+common_peg_parser analyze_content::build_optional_wrapped(parser_build_context & ctx) const {
+    auto & p = ctx.p;
+
+    if (is_always_wrapped()) {
+        return p.optional(start + p.content(p.until(end)) + end);
+    }
+    return p.eps();
+}
+
+common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const {
+    switch (format.mode) {
+        case tool_format::JSON_NATIVE:
+            return build_tool_parser_json_native(ctx);
+        case tool_format::TAG_WITH_JSON:
+            return build_tool_parser_tag_json(ctx);
+        case tool_format::TAG_WITH_TAGGED:
+            return build_tool_parser_tag_tagged(ctx);
+        default:
+            LOG_ERR("[ERROR] Template seems to support tool calls, but failed to determine tool format. Tool calling will not work properly. "
+                "Check for a fixed template for your model in the models/templates directory of your llama.cpp installation or "
+                "report an issue at https://github.com/ggml-org/llama.cpp/issues\n");
+            return ctx.p.eps();
+    }
+}
+
+common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    // Build effective field names with dot notation if function_field is set
+    std::string name_field = format.name_field;
+    std::string args_field = format.args_field;
+
+    if (!format.function_field.empty() && format.function_field != "function" &&
+        name_field.find('.') == std::string::npos) {
+        name_field = format.function_field + "." + name_field;
+        args_field = format.function_field + "." + args_field;
+    }
+
+    auto tools_parser = p.standard_json_tools(
+        format.section_start, format.section_end, inputs.tools, inputs.parallel_tool_calls,
+        inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
+        format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
+
+    // Handle content wrappers if present
+    if (ctx.content && ctx.content->is_always_wrapped()) {
+        auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
+        return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
+    }
+
+    std::string tool_start = "{";
+    if (!format.section_start.empty()) {
+        tool_start = format.section_start;
+    } else if (!format.per_call_start.empty()) {
+        tool_start = format.per_call_start;
+    }
+
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser +
+           p.end();
+}
+
+common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
+                                                    const common_peg_parser & call_id_section, bool have_call_id,
+                                                    const common_peg_parser & args,
+                                                    std::optional<common_peg_parser> atomic_peek) const {
+    auto              open           = p.tool_open(function.name_prefix + p.tool_name(p.literal(name)) + function.name_suffix);
+    bool              matched_atomic = false;
+    common_peg_parser func_parser    = p.eps();
+
+    if (!function.name_suffix.empty()) {
+        func_parser    = open + call_id_section + p.space() + args;
+        matched_atomic = true;
+    } else if (have_call_id) {
+        func_parser    = p.atomic(open + call_id_section) + p.space() + args;
+        matched_atomic = true;
+    } else if (atomic_peek.has_value()) {
+        func_parser    = p.atomic(open + call_id_section + p.space() + *atomic_peek) + args;
+        matched_atomic = true;
+    } else {
+        func_parser = open + call_id_section + p.space() + args;
+    }
+
+    if (!function.close.empty()) {
+        func_parser = func_parser + p.space() + p.tool_close(p.literal(function.close));
+    } else if (!format.per_call_end.empty()) {
+        // When there's no func_close but there is a per_call_end marker, use peek() to ensure
+        // we only emit tool_close when we can actually see the closing marker. This prevents
+        // premature closing during partial parsing when we've seen e.g. "</" which could be
+        // either "</tool_call>" (end) or "<arg_key>" prefix that failed to match.
+        func_parser = func_parser + p.tool_close(p.peek(p.literal(format.per_call_end)));
+    } else {
+        func_parser = func_parser + p.tool_close(p.space());  // force this to process tool closing callbacks in mapper
+    }
+    if (!matched_atomic) {
+        func_parser = p.atomic(func_parser);
+    }
+    return func_parser;
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_choice = p.choice();
+
+    foreach_function(inputs.tools, [&](const json & tool) {
+        const auto & func   = tool.at("function");
+        std::string  name   = func.at("name");
+        const auto & schema = func.contains("parameters") ? func.at("parameters") : json::object();
+
+        // Build call_id parser based on position (if supported)
+        bool have_call_id = false;
+        common_peg_parser call_id_section = p.eps();
+        if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+            (!call_id.suffix.empty() || !arguments.start.empty())) {
+            if (!call_id.suffix.empty()) {
+                call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix))) + call_id.suffix;
+            } else {
+                call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(arguments.start)));
+            }
+            have_call_id = true;
+        }
+        auto args_parser = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema));
+        if (!arguments.start.empty()) {
+            args_parser = p.literal(arguments.start) + args_parser;
+        }
+        if (!arguments.end.empty()) {
+            args_parser = args_parser + p.literal(arguments.end);
+        }
+
+        auto atomic_peek = !arguments.start.empty() ? std::optional(p.peek(p.literal(arguments.start))) : std::nullopt;
+        auto func_parser = build_func_parser(p, name, call_id_section, have_call_id, args_parser, atomic_peek);
+        tool_choice |= p.rule("tool-" + name, func_parser);
+    });
+
+    auto require_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_calls = p.eps();
+
+    if (!format.per_call_start.empty()) {
+        auto wrapped_call = format.per_call_start + tool_choice + format.per_call_end;
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+        } else {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call);
+        }
+        if (!format.section_start.empty()) {
+            tool_calls = p.trigger_rule("tool-calls",
+                                        p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+                                            (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+        }
+    } else {
+        std::string separator = ", ";  // Default
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice +
+                                                         p.zero_or_more(separator + tool_choice) + format.section_end);
+        } else {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + tool_choice + format.section_end);
+        }
+    }
+
+    if (!require_calls) {
+        tool_calls = p.optional(tool_calls);
+    }
+
+    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
+    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+           p.end();
+}
+
+common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
+    auto &       p           = ctx.p;
+    const auto & inputs      = ctx.inputs;
+    bool         force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix));
+
+    common_peg_parser tool_choice = p.choice();
+
+    foreach_function(inputs.tools, [&](const json & tool) {
+        const auto &          func       = tool.at("function");
+        std::string           name       = func.at("name");
+        auto                  params     = func.contains("parameters") ? func.at("parameters") : json::object();
+        const auto &          properties = params.contains("properties") ? params.at("properties") : json::object();
+
+        std::set<std::string> required;
+        if (params.contains("required")) {
+            params.at("required").get_to(required);
+        }
+
+        auto schema_info = common_schema_info();
+        schema_info.resolve_refs(params);
+
+        // Build parser for each argument, separating required and optional
+        std::vector<common_peg_parser> required_parsers;
+        std::vector<common_peg_parser> optional_parsers;
+        for (const auto & [param_name, param_schema] : properties.items()) {
+            bool is_required = required.find(param_name) != required.end();
+
+            auto arg =
+                p.tool_arg(p.tool_arg_open(arguments.name_prefix + p.tool_arg_name(p.literal(param_name)) +
+                                           arguments.name_suffix) +
+                           arguments.value_prefix +
+                           (schema_info.resolves_to_string(param_schema) ?
+                                p.tool_arg_string_value(p.schema(until_suffix,
+                                                                 "tool-" + name + "-arg-" + param_name + "-schema",
+                                                                 param_schema, true)) :
+                                p.tool_arg_json_value(p.schema(
+                                    p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
+                                    p.space()) +
+                           p.tool_arg_close(p.literal(arguments.value_suffix)));
+
+            auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
+            if (is_required) {
+                required_parsers.push_back(named_arg);
+            } else {
+                optional_parsers.push_back(named_arg);
+            }
+        }
+
+        // Build required arg sequence in definition order
+        common_peg_parser args_seq = p.eps();
+        for (size_t i = 0; i < required_parsers.size(); i++) {
+            if (i > 0) {
+                args_seq = args_seq + p.space();
+            }
+            args_seq = args_seq + required_parsers[i];
+        }
+
+        // Build optional args with flexible ordering
+        if (!optional_parsers.empty()) {
+            common_peg_parser any_opt = p.choice();
+            for (const auto & opt : optional_parsers) {
+                any_opt |= opt;
+            }
+            args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
+        }
+
+        if (!arguments.start.empty()) {
+            args_seq = p.literal(arguments.start) + args_seq;
+        }
+        if (!arguments.end.empty()) {
+            args_seq = args_seq + p.literal(arguments.end);
+        }
+
+        // Build call_id parser based on position (if supported)
+        common_peg_parser call_id_section = p.eps();
+        bool have_call_id = false;
+        if (call_id.pos == call_id_position::BETWEEN_FUNC_AND_ARGS && !call_id.prefix.empty() &&
+            (!call_id.suffix.empty() || !arguments.start.empty())) {
+            have_call_id = true;
+            if (!call_id.suffix.empty()) {
+                call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(call_id.suffix)) + call_id.suffix);
+            } else {
+                call_id_section = p.optional(call_id.prefix + p.tool_id(p.until(arguments.start)));
+            }
+        }
+
+        // Only peek for an arg tag when there are required args that must follow.
+        // When all args are optional, the model may emit no arg tags at all (#20650).
+        auto atomic_peek = (!arguments.name_prefix.empty() && !required_parsers.empty()) ?
+            std::optional(p.peek(p.literal(arguments.name_prefix))) : std::nullopt;
+        auto func_parser = build_func_parser(p, name, call_id_section, have_call_id, args_seq, atomic_peek);
+        tool_choice |= p.rule("tool-" + name, func_parser);
+    });
+
+    auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+    common_peg_parser tool_calls = p.eps();
+
+    if (!format.per_call_start.empty()) {
+        auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call));
+        } else {
+            tool_calls = p.trigger_rule("tool-call", wrapped_call);
+        }
+        if (!format.section_start.empty()) {
+            tool_calls = p.trigger_rule("tool-calls",
+                                        p.literal(format.section_start) + p.space() + tool_calls + p.space() +
+                                            (format.section_end.empty() ? p.end() : p.literal(format.section_end)));
+        }
+    } else {
+        std::string separator = ", ";  // Default
+
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call", format.section_start + p.space() + tool_choice +
+                                                         p.zero_or_more(separator + tool_choice) + p.space() +
+                                                         format.section_end);
+        } else {
+            tool_calls = p.trigger_rule(
+                "tool-call", format.section_start + p.space() + tool_choice + p.space() + format.section_end);
+        }
+    }
+
+    if (!require_tools) {
+        tool_calls = p.optional(tool_calls);
+    }
+
+    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
+    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
+    return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
+           p.end();
+}
+
+}  // namespace autoparser
diff --git a/common/chat-auto-parser-helpers.cpp b/common/chat-auto-parser-helpers.cpp
new file mode 100644
index 0000000000..2499464cd8
--- /dev/null
+++ b/common/chat-auto-parser-helpers.cpp
@@ -0,0 +1,364 @@
+#include "chat-auto-parser-helpers.h"
+
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <cctype>
+#include <numeric>
+
+using json = nlohmann::ordered_json;
+
+std::string trim_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    if (start == str.length()) {
+        return "";
+    }
+
+    size_t end = str.length() - 1;
+    while (end > start && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+
+    return str.substr(start, end - start + 1);
+}
+
+std::string trim_leading_whitespace(const std::string & str) {
+    size_t start = 0;
+    while (start < str.length() && std::isspace(static_cast<unsigned char>(str[start]))) {
+        start++;
+    }
+
+    return str.substr(start);
+}
+
+std::string trim_trailing_whitespace(const std::string & str) {
+    if (str.empty()) {
+        return "";
+    }
+
+    size_t end = str.length() - 1;
+    while (end > 0 && std::isspace(static_cast<unsigned char>(str[end]))) {
+        end--;
+    }
+
+    // If first char is also whitespace, return empty string
+    if (end == 0 && std::isspace(static_cast<unsigned char>(str[0]))) {
+        return "";
+    }
+
+    return str.substr(0, end + 1);
+}
+
+std::string trim_trailing_newlines(const std::string & str) {
+    size_t end = str.length();
+    while (end > 0 && str[end - 1] == '\n') {
+        end--;
+    }
+
+    return str.substr(0, end);
+}
+
+static size_t common_prefix_len(const std::string & left, const std::string & right) {
+    size_t prefix_len = 0;
+    size_t min_len    = std::min(left.length(), right.length());
+    while (prefix_len < min_len && left[prefix_len] == right[prefix_len]) {
+        prefix_len++;
+    }
+    return prefix_len;
+}
+
+static size_t common_suffix_len(const std::string & left, const std::string & right) {
+    size_t suffix_len = 0;
+    size_t min_len    = std::min(left.length(), right.length());
+    while (suffix_len < min_len && left[left.length() - 1 - suffix_len] == right[right.length() - 1 - suffix_len]) {
+        suffix_len++;
+    }
+    return suffix_len;
+}
+
+diff_split calculate_diff_split(const std::string & left, const std::string & right) {
+    diff_split result;
+
+    auto left_seg = segmentize_markers(left);
+    auto right_seg = segmentize_markers(right);
+
+    if (left_seg.empty()) {
+        result.right = right;
+        return result;
+    }
+    if (right_seg.empty()) {
+        result.left = left;
+        return result;
+    }
+
+    auto left_start = left_seg.begin();
+    auto left_end = --left_seg.end();
+    auto right_start = right_seg.begin();
+    auto right_end = --right_seg.end();
+
+    auto test = [&] () {
+        return left_start != left_end && right_start != right_end;
+    };
+
+    bool left_fully_consumed = false;
+    bool right_fully_consumed = false;
+
+    while (test()) {
+        bool advanced = false;
+        if (*left_start == *right_start) {
+            result.prefix.append(left_start->value);
+            left_start++;
+            right_start++;
+            advanced = true;
+        }
+        if (*left_end == *right_end) {
+            result.suffix = left_end->value + result.suffix;
+            if (left_start != left_end) {
+                left_end--;
+            } else {
+                left_fully_consumed = true;
+            }
+            if (right_start != right_end) {
+                right_end--;
+            } else {
+                right_fully_consumed = true;
+            }
+            advanced = true;
+        }
+        if (!advanced) {
+            break;
+        }
+    }
+
+    if (left_start == left_end && right_start != right_end) {
+        if (*left_start == *right_end) {
+            result.suffix = right_end->value + result.suffix;
+            right_end--;
+            left_fully_consumed = true;
+        } else if (*left_start == *right_start) {
+            result.prefix.append(right_start->value);
+            right_start++;
+            left_fully_consumed = true;
+        }
+    } else if (right_start == right_end && left_start != left_end) {
+        if (*left_end == *right_start) {
+            result.suffix = left_end->value + result.suffix;
+            left_end--;
+            right_fully_consumed = true;
+        } else if (*left_start == *right_start) {
+            result.prefix.append(left_start->value);
+            left_start++;
+            right_fully_consumed = true;
+        }
+    } else if (left_start == left_end && right_start == right_end && *left_start == *right_start && left_start->type == segment_type::MARKER) {
+        result.prefix.append(right_start->value);
+        left_fully_consumed = true;
+        right_fully_consumed = true;
+    }
+
+    auto eat_segment = [](std::string str, const segment & seg) -> std::string { return std::move(str) + seg.value; };
+
+    bool can_have_text_suffix = left_end->type == segment_type::TEXT && right_end->type == segment_type::TEXT;
+    bool can_have_text_prefix = right_start->type == segment_type::TEXT && left_start->type == segment_type::TEXT;
+
+    std::string remainder_left = std::accumulate(left_start, left_fully_consumed ? left_end : ++left_end, std::string(), eat_segment);
+    std::string remainder_right = std::accumulate(right_start, right_fully_consumed ? right_end : ++right_end, std::string(), eat_segment);
+
+    size_t suffix_len = can_have_text_suffix ? common_suffix_len(remainder_left, remainder_right) : 0;
+    // avoid overlaps between prefix and suffix
+    size_t prefix_len = can_have_text_prefix ? common_prefix_len(remainder_left.substr(0, remainder_left.size() - suffix_len),
+        remainder_right.substr(0, remainder_right.size() - suffix_len)) : 0;
+
+    result.prefix.append(remainder_left.substr(0, prefix_len));
+    result.suffix = remainder_left.substr(remainder_left.length() - suffix_len, suffix_len) + result.suffix;
+    result.left = remainder_left.substr(prefix_len, remainder_left.length() - prefix_len - suffix_len);
+    result.right = remainder_right.substr(prefix_len, remainder_right.length() - prefix_len - suffix_len);
+
+    if (result.left == "" && result.right == "") {
+        // degenerate case, no diff
+        result.prefix = left;
+        result.suffix = "";
+        // pick prefix = all as representation
+    }
+
+    // When left has no unique content (result.left is empty), left is entirely
+    // shared with right. The simultaneous prefix/suffix segment matching can
+    // incorrectly consume trailing segments of left as suffix when those same
+    // segments also appear at the end of right (e.g. "\n" at the end of both
+    // the shared content and the generation prompt). This rotates the diff.
+    // Fix: if left is a prefix of right, enforce that directly.
+    if (result.left.empty() && !result.right.empty() &&
+            left.size() <= right.size() &&
+            right.substr(0, left.size()) == left) {
+        result.prefix = left;
+        result.suffix = "";
+        result.right  = right.substr(left.size());
+    }
+
+    return result;
+}
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common prefix of left and right
+    size_t common_prefix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_prefix_len < min_len && left[common_prefix_len] == right[common_prefix_len]) {
+        common_prefix_len++;
+    }
+
+    // If there's no common prefix, return empty string
+    if (common_prefix_len == 0) {
+        return "";
+    }
+
+    // Find the common prefix in the full string
+    std::string common_prefix = left.substr(0, common_prefix_len);
+    size_t      pos           = full.find(common_prefix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything before the common prefix
+    return full.substr(0, pos);
+}
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right) {
+    // Find the common suffix of left and right (compare from the end)
+    size_t common_suffix_len = 0;
+    size_t min_len           = std::min(left.length(), right.length());
+    while (common_suffix_len < min_len &&
+           left[left.length() - 1 - common_suffix_len] == right[right.length() - 1 - common_suffix_len]) {
+        common_suffix_len++;
+    }
+
+    // If there's no common suffix, return empty string
+    if (common_suffix_len == 0) {
+        return "";
+    }
+
+    // Extract the common suffix
+    std::string common_suffix = left.substr(left.length() - common_suffix_len);
+
+    // Find the last occurrence of the common suffix in the full string
+    size_t pos = full.rfind(common_suffix);
+
+    // If not found, return empty string
+    if (pos == std::string::npos) {
+        return "";
+    }
+
+    // Return everything after the common suffix
+    return full.substr(pos + common_suffix_len);
+}
+
+// TODO: segmentize will treat a JSON array inside tags as a tag: <calls>[{ "fun": { ... } }]</calls> will be three markers
+// not too worried about that because it hasn't turned out as a problem anywhere, but noting here in case it will
+// Might have to put some restrictions on tag contents as well (like "no { }")
+std::vector<segment> segmentize_markers(const std::string & text) {
+    std::vector<segment> retval;
+    bool in_marker = false;
+    char marker_opener = '\0';
+
+    auto is_marker_opener = [](char c) -> bool { return c == '<' || c == '['; };
+    auto is_marker_closer = [](char op, char c) -> bool { return (op == '<' && c == '>') || (op == '[' && c == ']'); };
+
+    size_t last_border = 0;
+
+    for (size_t cur_pos = 0; cur_pos < text.length(); cur_pos++) {
+        if (!in_marker && is_marker_opener(text[cur_pos])) {
+            if (last_border < cur_pos) {
+                retval.push_back(segment(segment_type::TEXT, text.substr(last_border, cur_pos - last_border)));
+            }
+            last_border = cur_pos;
+            in_marker = true;
+            marker_opener = text[cur_pos];
+        } else if (in_marker && is_marker_closer(marker_opener, text[cur_pos])) {
+            // no need to check because last_border will always be smaller
+                retval.push_back(segment(segment_type::MARKER, text.substr(last_border, cur_pos - last_border + 1)));
+            last_border = cur_pos + 1;
+            in_marker = false;
+            marker_opener = '\0';
+        }
+    }
+    if (last_border < text.length()) {
+            retval.push_back(segment(segment_type::TEXT, text.substr(last_border)));
+    }
+    return retval;
+}
+
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments) {
+    std::vector<segment> result;
+    for (const auto & seg : segments) {
+        if (!trim_whitespace(seg.value).empty()) {
+            result.push_back(seg);
+        }
+    }
+    return result;
+}
+
+namespace autoparser {
+
+std::string apply_template(const common_chat_template & tmpl, const template_params & params) {
+    generation_params tmpl_params;
+    tmpl_params.messages              = params.messages;
+    tmpl_params.tools                 = params.tools;
+    tmpl_params.add_generation_prompt = params.add_generation_prompt;
+    tmpl_params.enable_thinking       = params.enable_thinking;
+
+    if (params.extra_context) {
+        tmpl_params.extra_context = *params.extra_context;
+    }
+    tmpl_params.extra_context["enable_thinking"] = params.enable_thinking;
+
+    try {
+        return common_chat_template_direct_apply(tmpl, tmpl_params);
+    } catch (const std::exception & e) {
+        LOG_DBG("Template application failed: %s\n", e.what());
+        return "";
+    }
+}
+
+std::optional<compare_variants_result> compare_variants(
+    const common_chat_template &                   tmpl,
+    const template_params &                        params_A,
+    const std::function<void(template_params &)> & params_modifier) {
+    // Create variant B by copying A
+    template_params params_B = params_A;
+
+    // Apply modifier to create variant B
+    if (params_modifier) {
+        params_modifier(params_B);
+    }
+
+    // Apply template to both variants
+    std::string output_A = apply_template(tmpl, params_A);
+    std::string output_B = apply_template(tmpl, params_B);
+
+    // Check for template application failures
+    if (output_A.empty() || output_B.empty()) {
+        return std::nullopt;
+    }
+
+    // Calculate diff and return result with both outputs
+    compare_variants_result result;
+    result.diff     = calculate_diff_split(output_A, output_B);
+    result.output_A = output_A;
+    result.output_B = output_B;
+
+    return result;
+}
+
+}  // namespace autoparser
+
diff --git a/common/chat-auto-parser-helpers.h b/common/chat-auto-parser-helpers.h
new file mode 100644
index 0000000000..b8804ac191
--- /dev/null
+++ b/common/chat-auto-parser-helpers.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include "chat-auto-parser.h"
+
+#include <functional>
+#include <optional>
+#include <string>
+
+std::string trim_whitespace(const std::string & str);
+std::string trim_leading_whitespace(const std::string & str);
+std::string trim_trailing_whitespace(const std::string & str);
+std::string trim_trailing_newlines(const std::string & str);
+
+// calculate a diff split (longest common prefix, longest common suffix excluding prefix,
+// mismatched part on the left, mismatched part on the right) between two strings
+// account for markers - align prefix and suffix endings so that they end on markers
+// * eg.:
+// calculate_diff_split("<html><body><div></div></body></html>", "<html><body><p>Something</p></body><html>") ->
+//  { "prefix": "<html><body>" (not: "<html><body><"), "suffix": "</body></html>", "left": "<div></div>", "right": "<p>Something</p>" }
+// calculate_diff_split("<html><body>Something</body></html>", "<html><body></body><html>") ->
+//  { "prefix": "<html><body>", "suffix": "</body></html>", "left": "Something", "right": "" }
+diff_split calculate_diff_split(const std::string & left, const std::string & right);
+
+// Returns the prefix of `full` up until the first occurrence of the common prefix of `left` and `right`
+// Returns empty string if there's no common prefix
+// * eg.:
+// until_common_prefix("really want a FUNCTION call", "FUNCTION alpha", "FUNCTION beta") -> "really want a "
+// until_common_prefix("<tool_call>", "<something>", "<something_else>") -> ""
+// until_common_prefix("some text", "1234", "abcd") -> ""
+// until_common_prefix("one arg two args three args four", "argument alpha", "argument beta") -> "one ""
+std::string until_common_prefix(const std::string & full, const std::string & left, const std::string & right);
+
+// Returns the suffix of `full` after the last occurrence of the common suffix of `left` and `right`
+// Returns empty string if there's no common suffix
+// Mirror function of `until_common_prefix`
+// * eg.:
+// after_common_suffix("really want a FUNCTION call", "first FUNCTION", "second FUNCTION") -> " call"
+// after_common_suffix("one arg two-args three args four", "alpha-args", "beta-args") -> " three args four"
+std::string after_common_suffix(const std::string & full, const std::string & left, const std::string & right);
+
+// Segmentize text into markers and non-marker fragments
+// * eg.:
+// segmentize_markers("<html><head><title>The site title</title><body><div>Here's some <b>content</b></div></body></html>" ->
+//  [ (MARKER, "<html>"), (MARKER, "<head>"), (MARKER, "<title>"), (TEXT, "The site title"), (MARKER, "</title>"),
+//    (MARKER, "<body>"), (MARKER, "<div>"), (TEXT, "Here's some "), (MARKER, "<b>"), (TEXT, "content"), (MARKER, "</b>"),
+//    (MARKER, "</div>"), (MARKER, "</body>"), (MARKER, "</html>")
+//  ]
+// segmentize_markers("<|tool_call|>[args]{ are here }[/args]<|tool_call_end|>") ->
+//  [ (MARKER, "<|tool_call|>"), (MARKER, "[args]"), (TEXT, "{ are here }"), (MARKER, "[/args]"), (MARKER, "<|tool_call_end|>") ]
+std::vector<segment> segmentize_markers(const std::string & text);
+
+// Prune whitespace-only segments from a vector of segments
+// * eg.:
+// segmentize_markers("<tool_call>\n<function=foo>\n<arg=bar>\n   \n</arg>\n</function>\n</tool_call>") ->
+//  X = [ (MARKER, "<tool_call>"), (TEXT, "\n"), (MARKER, "<function=foo>"), (TEXT, "\n"), (MARKER, "<arg=bar>"), (TEXT, "\n   \n"),
+//        (MARKER, "</arg>"), (TEXT, "\n"), (MARKER, "</function>"), (TEXT, "\n"), (MARKER, "</tool_call>") ]
+// prune_whitespace_segments(X) -> [ (MARKER, "<tool_call>"), (MARKER, "<function=foo>"), (MARKER, "<arg=bar>"), (MARKER, "</arg>"),
+//                                   (MARKER, "</function>"), (MARKER, "</tool_call>") ]
+std::vector<segment> prune_whitespace_segments(const std::vector<segment> & segments);
+
+namespace autoparser {
+
+// Apply a template with the given parameters, returning the rendered string (empty on failure)
+std::string apply_template(const common_chat_template & tmpl, const template_params & params);
+
+// Factorized differential comparison function
+// Takes base params and a single modifier lambda to create variant B
+// Returns compare_variants_result containing diff and both outputs, or std::nullopt on failure
+std::optional<compare_variants_result> compare_variants(
+    const common_chat_template &                   tmpl,
+    const template_params &                        params_A,
+    const std::function<void(template_params &)> & params_modifier);
+
+}  // namespace autoparser
diff --git a/common/chat-auto-parser.h b/common/chat-auto-parser.h
new file mode 100644
index 0000000000..99dd9f063c
--- /dev/null
+++ b/common/chat-auto-parser.h
@@ -0,0 +1,434 @@
+#pragma once
+
+#include "chat.h"
+#include "common.h"
+#include "jinja/caps.h"
+#include "peg-parser.h"
+#include "nlohmann/json.hpp"
+
+#include <chrono>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+using json = nlohmann::ordered_json;
+
+class common_chat_peg_builder;
+
+// ============================================================================
+// Parameters for template application (low-level, used by diff analysis)
+// ============================================================================
+struct template_params {
+    json                messages;
+    json                tools;
+    bool                add_generation_prompt = false;
+    bool                enable_thinking       = true;
+    std::optional<json> extra_context         = std::nullopt;
+};
+
+struct diff_split {
+    std::string prefix;
+    std::string suffix;
+    std::string left;
+    std::string right;
+
+    bool operator==(struct diff_split & other) const {
+        return prefix == other.prefix && suffix == other.suffix && left == other.left && right == other.right;
+    }
+};
+
+// Result of compare_variants containing diff and original outputs
+struct compare_variants_result {
+    diff_split  diff;
+    std::string output_A;
+    std::string output_B;
+};
+
+namespace autoparser {
+
+// ============================================================================
+// High-level params for parser generation
+// ============================================================================
+
+struct generation_params {
+    json                                  messages;
+    json                                  tools;
+    common_chat_tool_choice               tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    json                                  json_schema;
+    bool                                  parallel_tool_calls = true;
+    common_reasoning_format               reasoning_format    = COMMON_REASONING_FORMAT_AUTO;
+    bool                                  stream              = true;
+    std::string                           grammar;
+    bool                                  add_generation_prompt = false;
+    bool                                  enable_thinking       = true;
+    std::chrono::system_clock::time_point now                   = std::chrono::system_clock::now();
+    std::string                           generation_prompt;
+    json                                  extra_context;
+    bool                                  add_bos       = false;
+    bool                                  add_eos       = false;
+    bool                                  is_inference  = true;
+    bool                                  add_inference = false;
+    bool                                  mark_input    = true;  // whether to mark input strings in the jinja context
+};
+
+// ============================================================================
+// Analysis Result Enums
+// ============================================================================
+
+// Reasoning handling mode (derived from R1-R3 comparisons)
+enum class reasoning_mode {
+    NONE,           // No reasoning markers detected
+    TAG_BASED,      // Tag-based: <think>...</think> (start can be empty for delimiter-style)
+    TOOLS_ONLY      // Only reason on tool calls, not on normal content
+};
+
+inline std::ostream & operator<<(std::ostream & os, const reasoning_mode & mode) {
+    switch (mode) {
+        case reasoning_mode::NONE:
+            return os << "NONE";
+        case reasoning_mode::TAG_BASED:
+            return os << "TAG_BASED";
+        case reasoning_mode::TOOLS_ONLY:
+            return os << "TOOLS_ONLY";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Content wrapping mode (derived from C1 comparison)
+enum class content_mode {
+    PLAIN,                   // No content markers
+    ALWAYS_WRAPPED,          // Content always wrapped with markers
+    WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
+};
+
+inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
+    switch (mode) {
+        case content_mode::PLAIN:
+            return os << "PLAIN";
+        case content_mode::ALWAYS_WRAPPED:
+            return os << "ALWAYS_WRAPPED";
+        case content_mode::WRAPPED_WITH_REASONING:
+            return os << "WRAPPED_WITH_REASONING";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Call ID position in tool calls (for non-JSON formats)
+enum class call_id_position {
+    NONE,                   // No call ID support detected
+    PRE_FUNC_NAME,          // Call ID before function name: [CALL_ID]id[FUNC]name{args}
+    BETWEEN_FUNC_AND_ARGS,  // Call ID between function and args: [FUNC]name[CALL_ID]id{args}
+    POST_ARGS,              // Call ID after arguments: [FUNC]name{args}[CALL_ID]id
+};
+
+inline std::ostream & operator<<(std::ostream & os, const call_id_position & pos) {
+    switch (pos) {
+        case call_id_position::NONE:
+            return os << "NONE";
+        case call_id_position::PRE_FUNC_NAME:
+            return os << "PRE_FUNC_NAME";
+        case call_id_position::BETWEEN_FUNC_AND_ARGS:
+            return os << "BETWEEN_FUNC_AND_ARGS";
+        case call_id_position::POST_ARGS:
+            return os << "POST_ARGS";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// Tool call format classification (derived from T1-T5, A1-A3 comparisons)
+enum class tool_format {
+    NONE,             // No tool support detected
+    JSON_NATIVE,      // Pure JSON: {"name": "X", "arguments": {...}}
+    TAG_WITH_JSON,    // Tag-based with JSON args: <function=X>{...}</function>
+    TAG_WITH_TAGGED,  // Tag-based with tagged args: <param=key>value</param>
+};
+
+inline std::ostream & operator<<(std::ostream & os, const tool_format & format) {
+    switch (format) {
+        case tool_format::NONE:
+            return os << "NONE";
+        case tool_format::JSON_NATIVE:
+            return os << "JSON_NATIVE";
+        case tool_format::TAG_WITH_JSON:
+            return os << "TAG_WITH_JSON";
+        case tool_format::TAG_WITH_TAGGED:
+            return os << "TAG_WITH_TAGGED";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+// ============================================================================
+// Sub-structs for tool analysis
+// ============================================================================
+
+struct tool_format_analysis {
+    tool_format mode = tool_format::NONE;
+
+    std::string section_start;   // e.g., "<tool_call>", "[TOOL_CALLS]", ""
+    std::string section_end;     // e.g., "</tool_call>", ""
+    std::string per_call_start;  // e.g., "<|tool_call_begin|>", "" (for multi-call templates)
+    std::string per_call_end;    // e.g., "<|tool_call_end|>", ""
+
+    bool fun_name_is_key = false;       // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
+    bool tools_array_wrapped = false;   // Tool calls wrapped in JSON array [...]
+
+    std::string              function_field = "function";
+    std::string              name_field     = "name";
+    std::string              args_field     = "arguments";
+    std::string              id_field;
+    std::string              gen_id_field;
+    std::vector<std::string> parameter_order;
+};
+
+struct tool_function_analysis {
+    std::string name_prefix;  // e.g., "<function=", "\"name\": \"", "functions."
+    std::string name_suffix;  // e.g., ">", "\"", ":0"
+    std::string close;        // e.g., "</function>", "" (for tag-based)
+};
+
+struct tool_arguments_analysis {
+    std::string start;          // e.g., "<|tool_call_argument_begin|>", "<args>"
+    std::string end;            // e.g., "<|tool_call_argument_end|>", "</args>"
+    std::string name_prefix;   // e.g., "<param=", "<arg_key>", "\""
+    std::string name_suffix;   // e.g., ">", "</arg_key>", "\":"
+    std::string value_prefix;  // e.g., "", "<arg_value>", ""
+    std::string value_suffix;  // e.g., "</param>", "</arg_value>", ""
+    std::string separator;     // e.g., "", "\n", ","
+};
+
+struct tool_id_analysis {
+    call_id_position pos = call_id_position::NONE;
+
+    std::string prefix;  // e.g., "[CALL_ID]" (marker before call ID value)
+    std::string suffix;  // e.g., "" (marker after call ID value, before next section)
+};
+
+// ============================================================================
+// Parser build context (shared interface for build_parser methods)
+// ============================================================================
+
+struct analyze_content;
+struct analyze_reasoning;
+
+struct parser_build_context {
+    common_chat_peg_builder & p;
+    const generation_params &         inputs;
+    common_peg_parser                 reasoning_parser;
+    bool                              extracting_reasoning = false;
+    const analyze_reasoning *         reasoning            = nullptr;
+    const analyze_content *           content              = nullptr;
+
+    parser_build_context(common_chat_peg_builder & p, const generation_params & inputs);
+};
+
+// ============================================================================
+// Base class for analyzers with parser building
+// ============================================================================
+
+struct analyze_base {
+    virtual ~analyze_base() = default;
+    virtual common_peg_parser build_parser(parser_build_context & ctx) const = 0;
+
+  protected:
+    const common_chat_template * tmpl = nullptr;
+
+    analyze_base() = default;
+    explicit analyze_base(const common_chat_template & tmpl) : tmpl(&tmpl) {}
+};
+
+// ============================================================================
+// Reasoning analyzer
+// ============================================================================
+
+struct analyze_reasoning : analyze_base {
+    reasoning_mode mode = reasoning_mode::NONE;
+
+    std::string start;  // e.g., "<think>", "[THINK]", "<|START_THINKING|>", ""
+    std::string end;    // e.g., "</think>", "[BEGIN FINAL RESPONSE]", "<|END_THINKING|>"
+
+    analyze_reasoning() = default;
+    analyze_reasoning(const common_chat_template & tmpl, bool supports_tools);
+    analyze_reasoning(std::string start_, std::string end_) : start(std::move(start_)), end(std::move(end_)) {}
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+  private:
+    // Look for reasoning markers in rendered content
+    void compare_reasoning_presence();
+
+    // Compare generation prompt with enable_thinking=true vs false
+    void compare_thinking_enabled();
+
+    // Check if reasoning is always possible or only in tool calls
+    void compare_reasoning_scope();
+};
+
+// ============================================================================
+// Content analyzer
+// ============================================================================
+
+struct analyze_content : analyze_base {
+    content_mode mode = content_mode::PLAIN;
+
+    std::string start;  // e.g., "<response>", ">>>all\n", ""
+    std::string end;    // e.g., "</response>", ""
+
+    bool requires_nonnull_content = false;
+
+    analyze_content() = default;
+    analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning);
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+    bool is_always_wrapped() const;
+    common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
+};
+
+// ============================================================================
+// Tool analyzer
+// ============================================================================
+
+struct analyze_tools : analyze_base {
+    tool_format_analysis    format;
+    tool_function_analysis  function;
+    tool_arguments_analysis arguments;
+    tool_id_analysis        call_id;
+
+    analyze_tools() = default;
+    analyze_tools(const common_chat_template & tmpl,
+                  const jinja::caps &          caps,
+                  const analyze_reasoning &    reasoning);
+
+    common_peg_parser build_parser(parser_build_context & ctx) const override;
+
+  private:
+    // Extract tool calling 'haystack' for further analysis and delegate further analysis based on format
+    void analyze_tool_calls(const analyze_reasoning & reasoning);
+
+    // Analyze format based on position of function and argument name in needle
+    void analyze_tool_call_format(const std::string &       haystack,
+                                  const std::string &       fun_name_needle,
+                                  const std::string &       arg_name_needle,
+                                  const analyze_reasoning & reasoning);
+
+    // Analyze specifics of JSON native format (entire tool call is a JSON object)
+    void analyze_tool_call_format_json_native(const std::string & clean_haystack,
+                                              const std::string & fun_name_needle,
+                                              const std::string & arg_name_needle);
+
+    // Analyze specifics of non-JSON native format (tags for function name or for function name and arguments)
+    void analyze_tool_call_format_non_json(const std::string & clean_haystack,
+                                           const std::string & fun_name_needle);
+
+    // Check for and extract specific per-call markers for non-native-JSON templates with parallel call support
+    void check_per_call_markers();
+
+    // Extract function name markers
+    void extract_function_markers();
+
+    // Delegates to separate functions for: separator analysis, argument name analysis, argument value analysis
+    void analyze_arguments();
+
+    // Extract argument name markers
+    void extract_argument_name_markers();
+
+    // Extract argument value markers
+    void extract_argument_value_markers();
+
+    // Extract argument separator, if specified (eg. <arg=foo>...</arg><sep><arg=bar>...</arg>)
+    void extract_argument_separator();
+
+    // Extract argument wrapper markers, if present (eg. '<args><arg=foo>...</arg><arg=bar>...</arg></args>')
+    void extract_args_markers();
+
+    // Extract call ID markers, if present
+    void extract_call_id_markers();
+
+    // Per-format tool parser builders
+    common_peg_parser build_tool_parser_json_native(parser_build_context & ctx) const;
+    common_peg_parser build_tool_parser_tag_json(parser_build_context & ctx) const;
+    common_peg_parser build_tool_parser_tag_tagged(parser_build_context & ctx) const;
+
+    // Shared helper: builds func_parser from open+call_id+args, handling atomic wrapping and close.
+    // atomic_peek: if present, used as the peek expression in the third atomicity branch.
+    common_peg_parser build_func_parser(common_chat_peg_builder & p, const std::string & name,
+                                        const common_peg_parser & call_id_section, bool have_call_id,
+                                        const common_peg_parser & args,
+                                        std::optional<common_peg_parser> atomic_peek) const;
+};
+
+// ============================================================================
+// Main autoparser class
+// ============================================================================
+
+struct autoparser {
+    jinja::caps          jinja_caps;
+    analyze_reasoning    reasoning;
+    analyze_content      content;
+    analyze_tools        tools;
+    bool                 analysis_complete = false;
+
+    // Preserved tokens for tokenizer (union of all non-empty markers)
+    std::vector<std::string> preserved_tokens;
+
+    autoparser() = default;
+
+    // Run full differential analysis on a template
+    void analyze_template(const common_chat_template & tmpl);
+
+    // Build the PEG parser for this template
+    common_peg_arena build_parser(const generation_params & inputs) const;
+
+  private:
+    // Collect tokens from entire analysis to preserve
+    void collect_preserved_tokens();
+};
+
+// ============================================================================
+// Parser generator
+// ============================================================================
+
+class peg_generator {
+  public:
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct generation_params & inputs);
+
+    static common_chat_params generate_parser(const common_chat_template &    tmpl,
+                                              const struct generation_params & inputs,
+                                              const autoparser &              autoparser);
+};
+
+}  // namespace autoparser
+
+enum segment_type { TEXT, MARKER };
+
+inline std::ostream & operator<<(std::ostream & os, const segment_type & type) {
+    switch (type) {
+        case segment_type::TEXT:
+            return os << "TEXT";
+        case segment_type::MARKER:
+            return os << "MARKER";
+        default:
+            return os << "UNKNOWN";
+    }
+}
+
+struct segment {
+    segment_type type;
+    std::string  value;
+
+    segment(segment_type type, std::string value) : type(type), value(std::move(value)) {}
+
+    bool operator==(const segment & other) const {
+        return type == other.type && value == other.value;
+    }
+
+    bool operator!=(const segment & other) const {
+        return !(*this == other);
+    }
+};
diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp
new file mode 100644
index 0000000000..fa3e368098
--- /dev/null
+++ b/common/chat-diff-analyzer.cpp
@@ -0,0 +1,1355 @@
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <algorithm>
+
+#define ANSI_RESET  "\033[0m"
+#define ANSI_PURPLE "\033[1m\x1b[38;5;126m"
+#define ANSI_ORANGE "\033[1m\x1b[38;5;214m"
+#define ANSI_RED    "\033[1m\x1b[38;5;196m"
+
+using json = nlohmann::ordered_json;
+
+namespace autoparser {
+
+static const std::string FUN_FIRST = "FFF_FIRST_FUN_F";
+static const std::string FUN_SECOND = "SSS_SECOND_FUN_S";
+static const std::string ARG_FIRST = "AA_ARG_FST_AA";
+static const std::string ARG_SECOND = "BB_ARG_SND_BB";
+static const std::string USER_MSG = "U_USER_MSG Hello END_U";
+static const std::string ASSISTANT_MSG = "A_ASST_MSG I can help END_A";
+static const std::string THINKING_CONTENT = "REASON_PART I am thinking END_R";
+static const std::string CALL_ID_001 = "call00001";
+static const std::string CALL_ID_002 = "call00002";
+static const std::string CALL_ID_999 = "call99999";
+
+static std::vector<std::function<void(const common_chat_template & tmpl, autoparser &)>> workarounds(
+    { // Old reasoning Qwen templates - they don't really display reasoning content, but we still want to
+      // support reasoning on them
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("content.split('</think>')") != std::string::npos &&
+              tmpl.src.find("reasoning_content") == std::string::npos &&
+              tmpl.src.find("<SPECIAL_12>") == std::string::npos &&
+              analysis.reasoning.mode == reasoning_mode::NONE) {
+              analysis.reasoning.mode  = reasoning_mode::TAG_BASED;
+              analysis.reasoning.start = "<think>";
+              analysis.reasoning.end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
+          }
+      },
+      // Granite 3.3, with separate reasoning and content markers
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
+                            "<response></response>") != std::string::npos) {
+              analysis.reasoning.mode  = reasoning_mode::TAG_BASED;
+              analysis.reasoning.start = "<think>";
+              analysis.reasoning.end   = "</think>";
+              analysis.preserved_tokens.push_back("<think>");
+              analysis.preserved_tokens.push_back("</think>");
+              analysis.content.mode  = content_mode::WRAPPED_WITH_REASONING;
+              analysis.content.start = "<response>";
+              analysis.content.end   = "</response>";
+              analysis.preserved_tokens.push_back("<response>");
+              analysis.preserved_tokens.push_back("</response>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Granite 3.3]\n" ANSI_RESET);
+          }
+      },
+      // Cohere Command R+ - content wrapped in <|CHATBOT_TOKEN|>...<|END_OF_TURN_TOKEN|>
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("<|CHATBOT_TOKEN|>") != std::string::npos &&
+              tmpl.src.find("<|END_OF_TURN_TOKEN|>") != std::string::npos && analysis.content.start.empty()) {
+              analysis.content.mode  = content_mode::ALWAYS_WRAPPED;
+              analysis.content.start = "<|CHATBOT_TOKEN|>";
+              analysis.content.end   = "<|END_OF_TURN_TOKEN|>";
+              analysis.preserved_tokens.push_back("<|CHATBOT_TOKEN|>");
+              analysis.preserved_tokens.push_back("<|END_OF_TURN_TOKEN|>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Cohere Command R+]\n" ANSI_RESET);
+          }
+      },
+      // Functionary - no tool call section delimiter
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find("set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", "
+                            "\"code_interpreter\") | list | length > 0") != std::string::npos) {
+              analysis.content.mode                = content_mode::PLAIN;
+              analysis.content.end                 = "";
+              analysis.tools.function.name_prefix  = "";
+              analysis.tools.format.section_start  = "";
+              analysis.tools.format.section_end    = "";
+              analysis.tools.format.per_call_start = "<function=";
+              analysis.tools.format.per_call_end   = "</function>";
+              analysis.tools.function.close        = "";
+              analysis.preserved_tokens.clear();
+              analysis.preserved_tokens.push_back("<|eot_id|>");
+              analysis.preserved_tokens.push_back("<|eom_id|>");
+              analysis.preserved_tokens.push_back("<function=");
+              analysis.preserved_tokens.push_back(">");
+              analysis.preserved_tokens.push_back("</function>");
+              LOG_DBG(ANSI_ORANGE "[Patch: Functionary 3.1]\n" ANSI_RESET);
+          }
+      },
+      // DeepSeek-R1-Distill-Qwen
+      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
+          if (tmpl.src.find(
+                  "{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>'") !=
+              std::string::npos) {
+              analysis.tools.format.section_start  = "<｜tool▁calls▁begin｜>";
+              analysis.tools.format.section_end    = "<｜tool▁calls▁end｜>";
+              analysis.tools.format.per_call_start = "<｜tool▁call▁begin｜>function";
+              analysis.tools.function.name_prefix  = "<｜tool▁sep｜>";
+              analysis.tools.format.per_call_end   = "<｜tool▁call▁end｜>";
+              analysis.tools.function.close        = "```";
+              LOG_DBG(ANSI_ORANGE "[Patch: DeepSeek-R1-Distill-Qwen]\n" ANSI_RESET);
+          }
+      }
+    });
+
+// Common JSON structures
+static json params_schema = {
+    { "type",       "object"                                                           },
+    { "properties",
+     { { ARG_FIRST, { { "type", "string" }, { "description", "First argument" } } },
+        { ARG_SECOND, { { "type", "string" }, { "description", "Second argument" } } } } },
+    { "required",   json::array({})                                                    }
+};
+
+static json tools = json::array({
+    { { "type", "function" },
+     { "function",
+        json{ { "name", FUN_FIRST }, { "description", "Test function foo" }, { "parameters", params_schema } } } },
+    { { "type", "function" },
+     { "function",
+        json{ { "name", FUN_SECOND }, { "description", "Test function bar" }, { "parameters", params_schema } } } }
+});
+
+static json user_msg = json{
+    { "role",    "user"  },
+    { "content", USER_MSG }
+};
+
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = CALL_ID_001) {
+    return json{
+        { "id",       id                                              },
+        { "type",     "function"                                      },
+        { "function", json{ { "name", name }, { "arguments", args } } }
+    };
+}
+
+static json first_tool_call_zero_args         = build_tool_call(FUN_FIRST, json::object(), CALL_ID_001);
+static json first_tool_call_one_arg           = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "XXXX" }}, CALL_ID_001);
+static json first_tool_call_one_arg_other_val = build_tool_call(FUN_FIRST, {{ ARG_FIRST, "YYYY" }}, CALL_ID_001);
+static json first_tool_call_other_arg         = build_tool_call(FUN_FIRST, {{ ARG_SECOND, "YYYY" }}, CALL_ID_001);
+
+static json first_tool_call =
+    build_tool_call(FUN_FIRST, json{{ ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, CALL_ID_001);
+static json second_tool_call =
+    build_tool_call(FUN_SECOND, json{ { ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, CALL_ID_002);
+static json first_tool_call_alt_id =
+    build_tool_call(FUN_FIRST, json{{ ARG_FIRST,  "XXXX" }, { ARG_SECOND, "YYYY" }}, CALL_ID_999);
+
+template <typename T>
+static std::string mode_to_str(T mode) {
+    std::ostringstream os;
+    os << mode;
+    return os.str();
+}
+
+void autoparser::analyze_template(const common_chat_template & tmpl) {
+    jinja_caps = tmpl.original_caps();
+    reasoning = analyze_reasoning(tmpl, jinja_caps.supports_tool_calls);
+    content = analyze_content(tmpl, reasoning);
+    tools = analyze_tools(jinja_caps.supports_tool_calls ? analyze_tools(tmpl, jinja_caps, reasoning) : analyze_tools());
+    collect_preserved_tokens();
+
+    for (auto & workaround : workarounds) {
+        workaround(tmpl, *this);
+    }
+
+    LOG_DBG("\n--- Reasoning & Content Structure ---\n");
+    LOG_DBG("reasoning_mode: %s\n", mode_to_str(reasoning.mode).c_str());
+    LOG_DBG("reasoning_start: '%s'\n", reasoning.start.c_str());
+    LOG_DBG("reasoning_end: '%s'\n", reasoning.end.c_str());
+    LOG_DBG("content_mode: %s\n", mode_to_str(content.mode).c_str());
+    LOG_DBG("content_start: '%s'\n", content.start.c_str());
+    LOG_DBG("content_end: '%s'\n", content.end.c_str());
+
+    LOG_DBG("\n--- Tool Call Structure ---\n");
+    LOG_DBG("tool_mode: %s\n", mode_to_str(tools.format.mode).c_str());
+    LOG_DBG("supports_tools: %s\n", jinja_caps.supports_tools ? "true" : "false");
+    LOG_DBG("supports_parallel_calls: %s\n", jinja_caps.supports_parallel_tool_calls ? "true" : "false");
+    LOG_DBG("tool_section_start: '%s'\n", tools.format.section_start.c_str());
+    LOG_DBG("tool_section_end: '%s'\n", tools.format.section_end.c_str());
+    LOG_DBG("per_call_start: '%s'\n", tools.format.per_call_start.c_str());
+    LOG_DBG("per_call_end: '%s'\n", tools.format.per_call_end.c_str());
+    LOG_DBG("func_name_prefix: '%s'\n", tools.function.name_prefix.c_str());
+    LOG_DBG("func_name_suffix: '%s'\n", tools.function.name_suffix.c_str());
+    LOG_DBG("func_close: '%s'\n", tools.function.close.c_str());
+    LOG_DBG("call_id_prefix: '%s'\n", tools.call_id.prefix.c_str());
+    LOG_DBG("call_id_suffix: '%s'\n", tools.call_id.suffix.c_str());
+    LOG_DBG("call_id_pos: '%s'\n", mode_to_str(tools.call_id.pos).c_str());
+    LOG_DBG("args_start: '%s'\n", tools.arguments.start.c_str());
+    LOG_DBG("args_end: '%s'\n", tools.arguments.end.c_str());
+    LOG_DBG("arg_name_prefix: '%s'\n", tools.arguments.name_prefix.c_str());
+    LOG_DBG("arg_name_suffix: '%s'\n", tools.arguments.name_suffix.c_str());
+    LOG_DBG("arg_value_prefix: '%s'\n", tools.arguments.value_prefix.c_str());
+    LOG_DBG("arg_value_suffix: '%s'\n", tools.arguments.value_suffix.c_str());
+    LOG_DBG("name_field: '%s'\n", tools.format.name_field.c_str());
+    LOG_DBG("args_field: '%s'\n", tools.format.args_field.c_str());
+    LOG_DBG("id_field: '%s'\n", tools.format.id_field.c_str());
+    LOG_DBG("gen_id_field: '%s'\n", tools.format.gen_id_field.c_str());
+    LOG_DBG("parameter_order: '%s'\n", std::accumulate(tools.format.parameter_order.begin(), tools.format.parameter_order.end(),
+        std::string(""), [] (const std::string & a, const std::string & b) { return a.empty() ? b : a + ", " + b; }
+        ).c_str());
+
+    LOG_DBG(ANSI_PURPLE "=== Differential analysis complete ===\n" ANSI_RESET);
+    analysis_complete = true;
+}
+
+void autoparser::collect_preserved_tokens() {
+    auto add_token = [this](const std::string & org_token) {
+        std::string token = trim_whitespace(org_token);
+        if (!token.empty()) {
+            // Avoid duplicates
+            if (std::find(preserved_tokens.begin(), preserved_tokens.end(), token) == preserved_tokens.end()) {
+                preserved_tokens.push_back(token);
+            }
+        }
+    };
+
+    add_token(reasoning.start);
+    add_token(reasoning.end);
+    add_token(content.start);
+    add_token(content.end);
+    add_token(tools.format.section_start);
+    add_token(tools.format.section_end);
+    add_token(tools.format.per_call_start);
+    add_token(tools.format.per_call_end);
+    add_token(tools.function.name_prefix);
+    add_token(tools.function.name_suffix);
+    add_token(tools.function.close);
+    add_token(tools.arguments.start);
+    add_token(tools.arguments.end);
+    add_token(tools.arguments.name_prefix);
+    add_token(tools.arguments.name_suffix);
+    add_token(tools.arguments.separator);
+    add_token(tools.arguments.value_prefix);
+    add_token(tools.arguments.value_suffix);
+    add_token(tools.call_id.prefix);
+    add_token(tools.call_id.suffix);
+}
+
+analyze_reasoning::analyze_reasoning(const common_chat_template & tmpl, bool supports_tools)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_PURPLE "=== Starting differential analysis ===\n" ANSI_RESET);
+    LOG_DBG(ANSI_ORANGE "Phase 1: Reasoning analysis\n" ANSI_RESET);
+
+    compare_reasoning_presence();
+    compare_thinking_enabled();
+    if (supports_tools) {
+        compare_reasoning_scope();
+    }
+}
+
+void analyze_reasoning::compare_reasoning_presence() {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", USER_MSG }
+    };
+
+    json assistant_no_reasoning = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"                },
+        { "content",           ASSISTANT_MSG              },
+        { "reasoning_content", THINKING_CONTENT           }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_reasoning }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed, skipping reasoning detection\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    const std::string reasoning_content = THINKING_CONTENT;
+
+    if (!diff.right.empty() && diff.right.find(reasoning_content) != std::string::npos) {
+        auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())) + p.rest());
+        });
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        // try the more aggressive parse first, if it fails, fall back to the delimiter one
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (!result.result.success()) {
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+        }
+        if (result.result.success()) {
+            if (!result.tags["pre"].empty() && !result.tags["post"].empty()) {
+                mode = reasoning_mode::TAG_BASED;
+                start = trim_leading_whitespace(result.tags["pre"]);
+                end   = trim_trailing_whitespace(result.tags["post"]);
+            } else if (!result.tags["post"].empty()) {
+                mode = reasoning_mode::TAG_BASED;
+                end = trim_trailing_whitespace(result.tags["post"]);
+            }
+        }
+    }
+}
+
+void analyze_reasoning::compare_thinking_enabled() {
+    json user_msg = json{
+        { "role",    "user"  },
+        { "content", USER_MSG }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg });
+    params.add_generation_prompt = true;
+    params.enable_thinking       = false;
+
+    auto comparison = compare_variants(*tmpl, params, [&](template_params & p) { p.enable_thinking = true; });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET , __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    std::string left_trimmed = trim_whitespace(diff.left);
+    std::string right_trimmed = trim_whitespace(diff.right);
+
+    if (left_trimmed.empty() && !diff.right.empty()) {
+        if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
+            if (start.empty()) {
+                start = trim_leading_whitespace(diff.right);
+                mode  = reasoning_mode::TAG_BASED;
+            }
+        }
+    } else if (right_trimmed.empty() && !diff.left.empty()) {
+        if (!left_trimmed.empty() && string_ends_with(comparison->output_A, left_trimmed)) {
+            if (end.empty()) {
+                auto seg = prune_whitespace_segments(segmentize_markers(comparison->output_A));
+                if (seg.size() >= 2 && seg[seg.size() - 1].value == left_trimmed && seg[seg.size() - 2].type == segment_type::MARKER) {
+                    start = seg[seg.size() - 2].value;
+                }
+                end = trim_trailing_whitespace(diff.left);
+                mode = reasoning_mode::TAG_BASED;
+            }
+        }
+    } else if (!left_trimmed.empty() && !right_trimmed.empty()) {
+        // Full-output diff is noisy (e.g., SmolLM3 changes the system message when enable_thinking flips).
+        // Try to find reasoning markers by tail-anchoring:
+        // one output's generation prompt tail may appear in the other with extra reasoning markers appended.
+        const auto & output_A = comparison->output_A;
+        const auto & output_B = comparison->output_B;
+        const size_t anchor_len = 64;
+
+        for (int dir = 0; dir < 2; dir++) {
+            const auto & base     = dir == 0 ? output_B : output_A;
+            const auto & extended = dir == 0 ? output_A : output_B;
+
+            size_t len = std::min(base.size(), anchor_len);
+            std::string anchor = base.substr(base.size() - len);
+            auto pos = extended.rfind(anchor);
+            if (pos == std::string::npos || pos + len >= extended.size()) {
+                continue;
+            }
+
+            std::string extra = trim_whitespace(extended.substr(pos + len));
+            if (extra.empty()) {
+                continue;
+            }
+
+            auto seg = prune_whitespace_segments(segmentize_markers(extra));
+            if (seg.size() == 2 && seg[0].type == segment_type::MARKER && seg[1].type == segment_type::MARKER) {
+                if (start.empty()) {
+                    start = seg[0].value;
+                }
+                if (end.empty()) {
+                    end   = seg[1].value;
+                }
+                mode = reasoning_mode::TAG_BASED;
+                break;
+            }
+        }
+    }
+
+    if (mode == reasoning_mode::NONE && start.empty() && !end.empty()) {
+        mode = reasoning_mode::TAG_BASED;
+    }
+}
+
+void analyze_reasoning::compare_reasoning_scope() {
+    json assistant_reasoning_content = json{
+        { "role",              "assistant"      },
+        { "content",           ASSISTANT_MSG    },
+        { "reasoning_content", THINKING_CONTENT }
+    };
+
+    json assistant_reasoning_tools = json{
+        { "role",              "assistant"                                                                  },
+        { "content",           nullptr                                                                      },
+        { "reasoning_content", THINKING_CONTENT                                                             },
+        { "tool_calls",
+         json::array({ build_tool_call(FUN_FIRST, json{ { ARG_FIRST, "VVVV" }, { ARG_SECOND, "XXXX" } }) }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_reasoning_content });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_reasoning_tools }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    std::string reasoning_content = THINKING_CONTENT;
+
+    // Check if reasoning only appears in variant B (with tools)
+    bool reasoning_in_A = comparison->output_A.find(reasoning_content) != std::string::npos;
+    bool reasoning_in_B = comparison->output_B.find(reasoning_content) != std::string::npos;
+
+    if (!reasoning_in_A && reasoning_in_B) {
+        mode = reasoning_mode::TOOLS_ONLY;
+        LOG_DBG(ANSI_ORANGE "%s: Detected TOOLS_ONLY reasoning mode\n" ANSI_RESET, __func__);
+
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker() + p.space()) + p.literal(reasoning_content) + p.space() + p.tag("post", (p.marker() + p.space()));
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
+        if (result.result.success()) {
+            start = result.tags["pre"];
+            end = trim_trailing_whitespace(result.tags["post"]);
+        } else {
+            auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())));
+            });
+            result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
+            if (result.result.success()) {
+                end = trim_trailing_whitespace(result.tags["post"]);
+            } else {
+                LOG_DBG(ANSI_ORANGE "%s: Unable to extract reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
+                mode = reasoning_mode::NONE;
+            }
+        }
+    }
+}
+
+analyze_content::analyze_content(const common_chat_template & tmpl, const analyze_reasoning & reasoning)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_ORANGE "Phase 2: Content analysis\n" ANSI_RESET);
+
+    json assistant_content_only = json{
+        { "role",    "assistant"     },
+        { "content", ASSISTANT_MSG   }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ build_tool_call("test_func", json{ { "arg1", "value1" } }) }) }
+    };
+
+    json assistant_with_reasoning = json{
+        { "role",              "assistant"      },
+        { "content",           ""               },
+        { "reasoning_content", THINKING_CONTENT }
+    };
+
+    template_params params_content_only;
+    params_content_only.messages              = json::array({ user_msg, assistant_content_only });
+    params_content_only.add_generation_prompt = false;
+    params_content_only.enable_thinking       = true;
+    params_content_only.tools                 = tools;
+
+    auto comparison_with_tools = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_tools });
+    });
+
+    auto comparison_with_reasoning = compare_variants(tmpl, params_content_only, [&](template_params & p) {
+        p.messages = json::array({ user_msg, assistant_with_reasoning });
+    });
+
+    if (!comparison_with_tools || !comparison_with_reasoning) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff_tools     = comparison_with_tools->diff;
+    const auto & diff_reasoning = comparison_with_reasoning->diff;
+
+    std::string response = ASSISTANT_MSG;
+
+    bool found_plain_content = false;
+    if (trim_whitespace(diff_tools.left) == response) {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.space() + diff_reasoning.left + p.space() + p.optional(p.marker()) + p.space() + p.end();
+        });
+        if (parser.parse_and_extract(diff_reasoning.left).result.success()) {
+            // We only have the content text in the diff (possibly with a stray EOG marker), so no markers
+            mode = content_mode::PLAIN;
+            found_plain_content = true;
+        } else if (reasoning.mode != reasoning_mode::NONE && !reasoning.end.empty()) {
+            auto post_reasoning_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.literal(reasoning.end) + p.space() + p.literal(response);
+            });
+            if (post_reasoning_parser.parse_anywhere_and_extract(diff_reasoning.left).result.success()) {
+                mode = content_mode::PLAIN;
+                found_plain_content = true;
+            }
+        }
+    }
+    if (!found_plain_content) {
+        std::string rdiff = diff_reasoning.left;
+        if (!reasoning.end.empty() && rdiff.find(reasoning.end) != std::string::npos) {
+            rdiff = rdiff.substr(rdiff.find(reasoning.end) + reasoning.end.length());
+        }
+        // Take the more promising diff
+        std::string pure_content = rdiff.length() > diff_tools.left.length() ? rdiff : diff_tools.left;
+        auto parser_wrapped = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.marker() + p.space()) + p.literal(response) + p.space() + p.tag("post", (p.marker() + p.space())) + p.rest();
+        });
+        auto result = parser_wrapped.parse_anywhere_and_extract(pure_content);
+        start = result.tags["pre"];
+        end = result.tags["post"];
+        // TODO: WRAPPED_WITH_REASONING
+    }
+
+    // Determine content mode
+    if (!start.empty() || !end.empty()) {
+        mode = content_mode::ALWAYS_WRAPPED;
+        // TODO: END_DELIMITED content mode - delimited at end but not at start?
+    }
+}
+
+bool analyze_content::is_always_wrapped() const {
+    return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
+}
+
+analyze_tools::analyze_tools(const common_chat_template & tmpl,
+                             const jinja::caps &          caps,
+                             const analyze_reasoning &    reasoning)
+    : analyze_base(tmpl) {
+    LOG_DBG(ANSI_ORANGE "Phase 3: Tool call analysis\n" ANSI_RESET);
+
+    analyze_tool_calls(reasoning);
+
+    if (format.mode != tool_format::NONE && format.mode != tool_format::JSON_NATIVE) {
+        if (caps.supports_parallel_tool_calls) {
+            check_per_call_markers();
+        }
+        LOG_DBG(ANSI_ORANGE "Phase 3a: Function call analysis\n" ANSI_RESET);
+        extract_function_markers();
+        LOG_DBG(ANSI_ORANGE "Phase 3b: Argument analysis\n" ANSI_RESET);
+        if (format.mode == tool_format::TAG_WITH_TAGGED) {
+            analyze_arguments();
+        }
+        extract_argument_separator();
+        extract_args_markers();
+        LOG_DBG(ANSI_ORANGE "Phase 3c: Call id analysis\n" ANSI_RESET);
+        extract_call_id_markers();
+    }
+}
+
+void analyze_tools::analyze_tool_calls(const analyze_reasoning & reasoning) {
+    json assistant_no_tools = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG }
+    };
+
+    json assistant_with_tools = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_tools });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_tools }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    std::string tool_section = diff.right;
+
+    if (tool_section.empty()) {
+        return;
+    }
+
+    analyze_tool_call_format(tool_section, FUN_FIRST, ARG_FIRST, reasoning);
+}
+
+void analyze_tools::analyze_tool_call_format(const std::string &       haystack,
+                                             const std::string &       fun_name_needle,
+                                             const std::string &       arg_name_needle,
+                                             const analyze_reasoning & reasoning) {
+    if (fun_name_needle.empty() || arg_name_needle.empty() || haystack.empty()) {
+        return;
+    }
+
+    auto in_json_haystack = [&haystack](const std::string & needle) -> bool {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) << p.choice({
+                p.tag("dq", p.literal("\"") + p.literal(needle) + p.literal("\"")) });
+        });
+        auto result = parser.parse_anywhere_and_extract(haystack);
+        return result.result.success();
+    };
+
+    auto fun_quote = in_json_haystack(fun_name_needle);
+    auto arg_quote = in_json_haystack(arg_name_needle);
+
+    if (fun_quote) {
+        // no need to check further, we're in JSON land
+        format.mode = tool_format::JSON_NATIVE;
+    } else if (arg_quote) {
+        format.mode = tool_format::TAG_WITH_JSON;
+    } else {
+        format.mode = tool_format::TAG_WITH_TAGGED;
+    }
+
+    // first, remove any reasoning markers
+    std::string clean_haystack = haystack;
+    if (!reasoning.start.empty()) {
+        auto pos = haystack.find(reasoning.start);
+        if (pos != std::string::npos) {
+            clean_haystack = haystack.substr(0, pos) + haystack.substr(pos + reasoning.start.length());
+        }
+    }
+    if (!reasoning.end.empty()) {
+        auto pos = clean_haystack.find(reasoning.end);
+        if (pos != std::string::npos) {
+            clean_haystack = clean_haystack.substr(0, pos) + clean_haystack.substr(pos + reasoning.end.length());
+        }
+    }
+
+    if (format.mode == tool_format::JSON_NATIVE) {
+        analyze_tool_call_format_json_native(clean_haystack, fun_name_needle, arg_name_needle);
+    } else {
+        analyze_tool_call_format_non_json(clean_haystack, fun_name_needle);
+    }
+    // always relax whitespace requirements on ending markers since they don't influence content
+    format.section_end  = trim_whitespace(format.section_end);
+    format.per_call_end = trim_whitespace(format.per_call_end);
+}
+
+void analyze_tools::analyze_tool_call_format_json_native(const std::string & clean_haystack,
+                                                         const std::string & fun_name_needle,
+                                                         const std::string & arg_name_needle) {
+    // we might not have the typical OpenAI tool calling structure
+    int  json_start     = clean_haystack.find_first_of('{');
+    int  json_end       = clean_haystack.find_last_of('}');
+    std::string cut     = clean_haystack.substr(json_start, json_end - json_start + 1);
+    json call_struct    = json::parse(cut);
+    auto register_field = [&](const std::string & prefix, const nlohmann::detail::iteration_proxy_value<json::iterator> & subel) {
+        if (subel.value().is_string() && std::string(subel.value()).find("call0000") != std::string::npos) {
+            format.id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().is_string() && std::string(subel.value()) == fun_name_needle) {
+            format.name_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.value().dump().find(arg_name_needle) !=
+                   std::string::npos) {  // handle both string and JSON obj variants
+            format.args_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        } else if (subel.key().find("id") != std::string::npos) {
+            // heuristics for generated id field
+            format.gen_id_field = !prefix.empty() ? prefix + "." + subel.key() : subel.key();
+        }
+    };
+    for (const auto & el : call_struct.items()) {
+        if (el.key() == fun_name_needle) {
+            format.fun_name_is_key = true;
+            // When function name is the key, there's no name field and args are direct
+            format.name_field.clear();
+            format.args_field.clear();
+            // Don't register this element - the function name IS the key, not a field
+        } else {
+            if (el.value().is_object() &&
+                el.value().dump().find(arg_name_needle) == std::string::npos) {  // not the args object
+                format.function_field = el.key();
+                for (const auto & subel : el.value().items()) {
+                    register_field(el.key(), subel);
+                }
+            }
+            // Register this element as a potential field
+            register_field("", el);
+        }
+    }
+    auto array_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("pre", p.literal("[") + p.space()) + p.literal(cut) + p.tag("post", p.space() + p.literal("]"));
+    });
+
+    auto ar_parse_res = array_parser.parse_anywhere_and_extract(clean_haystack);
+    if (ar_parse_res.result.success()) {
+        format.tools_array_wrapped = true;
+        json_start -= ar_parse_res.tags["pre"].length();
+        json_end += ar_parse_res.tags["post"].length();
+    }
+    json_end++; // we want to move past the closing char for end marker extraction
+
+    std::vector<std::pair<size_t, std::string>> located_params;
+    if (!format.name_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.name_field), format.name_field });
+    }
+    if (!format.args_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.args_field), format.args_field });
+    }
+    if (!format.id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.id_field), format.id_field });
+    }
+    if (!format.gen_id_field.empty()) {
+        located_params.push_back({ clean_haystack.find(format.gen_id_field), format.gen_id_field });
+    }
+    std::sort(located_params.begin(), located_params.end());
+    for (auto & pair : located_params) {
+        format.parameter_order.push_back(pair.second);
+    }
+    // we can immediately extract tool calling markers too
+    format.section_start = trim_leading_whitespace(clean_haystack.substr(0, json_start));
+    format.section_end   = trim_whitespace(clean_haystack.substr(json_end));
+    // When tools_array_wrapped is true, the closing bracket is part of the array structure,
+    // not a separate section end marker. Clear tool_section_end to avoid duplicate brackets.
+    if (format.tools_array_wrapped && format.section_end == "]") {
+        format.section_end.clear();
+    }
+}
+
+void analyze_tools::analyze_tool_call_format_non_json(const std::string & clean_haystack,
+                                                      const std::string & fun_name_needle) {
+    // first, let's find out if the function is inside a tag or standalone
+    auto fun_marker_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("fun_marker", p.choice({
+            p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", fun_name_needle })) + p.literal(fun_name_needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+            p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", fun_name_needle })) + p.literal(fun_name_needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+    });
+    auto fun_res = fun_marker_parser.parse_anywhere_and_extract(clean_haystack);
+    std::string fun_marker = fun_name_needle;
+    if (fun_res.result.success()) {
+        fun_marker = fun_res.tags["fun_marker"];
+    }
+    // now, consume up to two markers, then treat everything up to the function marker as function name prefix
+    auto per_tool_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("sec_start", p.marker() + p.space()) + p.tag("call_start", p.marker() + p.space()) +
+            p.tag("fun_pre", p.until(fun_marker)) + fun_marker + p.tag("rest", p.rest());
+    });
+    auto section_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+        return p.tag("sec_start", p.marker() + p.space()) + fun_marker + p.tag("rest", p.rest());
+    });
+    auto result = per_tool_parser.parse_anywhere_and_extract(clean_haystack);
+    tagged_parse_result result_end;
+    if (result.result.success()) {
+        auto double_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("call_end", p.marker() + p.space()) + p.tag("sec_end", p.marker() + p.space()) + p.end();
+        });
+        result_end = double_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+        function.name_prefix = fun_res.tags["fun_pre"] + function.name_prefix;
+    } else {
+        result = section_parser.parse_anywhere_and_extract(clean_haystack);
+        auto single_closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("sec_end", p.marker() + p.space()) + p.end();
+        });
+        result_end = single_closer_parser.parse_anywhere_and_extract(result.tags["rest"]);
+    }
+    format.per_call_start = result.tags["call_start"];
+    format.per_call_end = result_end.tags["call_end"];
+    format.section_start = result.tags["sec_start"];
+    format.section_end = result_end.tags["sec_end"];
+}
+
+void analyze_tools::check_per_call_markers() {
+    json assistant_one_tool = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_two_tools = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call, second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_tool });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto one_vs_two = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_tools }); });
+
+    if (!one_vs_two) {
+        LOG_DBG(ANSI_ORANGE "%s: Generating double tool call comparison failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    diff_split filter_common_call_part = calculate_diff_split(one_vs_two->diff.suffix, one_vs_two->diff.right);
+
+    std::string second_tool_content = trim_leading_whitespace(filter_common_call_part.right);
+    if (!format.section_start.empty() &&
+        second_tool_content.find(format.section_start) == 0) {
+        format.per_call_start = format.section_start;
+        format.per_call_end   = format.section_end;
+        format.section_start.clear();
+        format.section_end.clear();
+    }
+}
+
+void analyze_tools::extract_function_markers() {
+    json assistant_nocall = json{
+        { "role",    "assistant"   },
+        { "content", ASSISTANT_MSG },
+    };
+
+    json assistant_foofoo = json{
+        { "role",       "assistant"                      },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_barbar = json{
+        { "role",       "assistant"                       },
+        { "content",    ""                                },
+        { "tool_calls", json::array({ second_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_foofoo });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_barbar }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left.find(FUN_FIRST) != std::string::npos && diff.right.find(FUN_SECOND) != std::string::npos) {
+        std::string prefix_marker;
+        if (!format.per_call_start.empty()) {
+            prefix_marker = format.per_call_start;
+        } else {
+            prefix_marker = format.section_start;
+        }
+        if (!prefix_marker.empty() && diff.prefix.rfind(prefix_marker) != std::string::npos) {
+            function.name_prefix =
+                diff.prefix.substr(diff.prefix.rfind(prefix_marker) + prefix_marker.size());
+        }
+
+        // Extract name prefix/suffix from diff.left (stop at the next marker boundary)
+        auto name_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+            return p.tag("pre", p.until(FUN_FIRST)) + p.literal(FUN_FIRST) +
+                   p.tag("post", p.zero_or_more(p.negate(p.marker()) + p.any()));
+        });
+        auto name_result = name_parser.parse_and_extract(diff.left);
+        if (name_result.result.success()) {
+            function.name_prefix += name_result.tags["pre"];
+            function.name_suffix = name_result.tags["post"];
+        }
+
+        // Extend name_suffix with content from diff.suffix before args begin
+        if (format.mode == tool_format::TAG_WITH_JSON) {
+            // For JSON: name_suffix extends to the first non-marker { or [, including any
+            // markers along the way. Only applies if there's at least one marker after
+            // the JSON content (matching the original "stop < seg_suf.size() - 1" guard).
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                auto non_json = p.marker() | (p.negate(p.literal("{")) + p.negate(p.literal("[")) + p.any());
+                auto after_json = p.zero_or_more(p.negate(p.marker()) + p.any()) + p.marker();
+                return p.tag("ext", p.zero_or_more(non_json)) + after_json;
+            });
+            auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+            if (suf_result.result.success()) {
+                function.name_suffix += suf_result.tags["ext"];
+            }
+        } else {
+            // For tagged: name_suffix extends to the first marker (arg marker)
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                return p.tag("ext", p.zero_or_more(p.negate(p.marker()) + p.any()));
+            });
+            auto suf_result = suffix_parser.parse_and_extract(diff.suffix);
+            if (suf_result.result.success()) {
+                function.name_suffix += suf_result.tags["ext"];
+            }
+        }
+
+        // Extract the closer (between last arg and call/section end marker)
+        std::string suffix_marker;
+        if (!format.per_call_end.empty()) {
+            suffix_marker = format.per_call_end;
+        } else {
+            suffix_marker = format.section_end;
+        }
+        std::string closer_suffix;
+        if (suffix_marker.empty()) {
+            // we'll have to rely on an extra diff with no-calls version
+            auto notool_comp = compare_variants(
+                *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_nocall }); });
+            if (notool_comp) {
+                auto nt_diff  = notool_comp->diff;
+                closer_suffix = nt_diff.left.substr(nt_diff.left.find("YYYY") + 4);
+            }
+        } else {
+            closer_suffix = diff.suffix.substr(0, diff.suffix.find(suffix_marker));
+        }
+        if (!closer_suffix.empty()) {
+            if (format.mode == tool_format::TAG_WITH_TAGGED) {
+                // After last arg value, skip the closing arg marker, rest is closer
+                auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                    return p.until("YYYY") + p.literal("YYYY") + p.space() +
+                           p.marker() + p.space() +
+                           p.tag("close", p.rest());
+                });
+                auto close_result = closer_parser.parse_and_extract(closer_suffix);
+                if (close_result.result.success()) {
+                    function.close = close_result.tags["close"];
+                }
+            } else if (format.mode == tool_format::TAG_WITH_JSON) {
+                // After last arg value, find end of JSON args, rest is closer
+                auto closer_parser = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
+                    return p.until("YYYY") + p.literal("YYYY") + p.tag("post_val", p.rest());
+                });
+                auto close_result = closer_parser.parse_and_extract(closer_suffix);
+                if (close_result.result.success()) {
+                    const auto & post = close_result.tags["post_val"];
+                    size_t pos = post.find_last_of("}]");
+                    if (pos != std::string::npos && pos < post.size() - 1) {
+                        function.close = trim_leading_whitespace(post.substr(pos + 1));
+                    }
+                }
+            }
+        }
+        function.close = trim_leading_whitespace(function.close);
+    }
+}
+
+void analyze_tools::analyze_arguments() {
+    extract_argument_name_markers();
+    extract_argument_value_markers();
+}
+
+void analyze_tools::extract_argument_name_markers() {
+    json assistant_first_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_second_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_other_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_first_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_second_arg }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (!diff.left.empty() && !diff.right.empty()) {
+        // Parse both sides to find ARG_FIRST/ARG_SECOND and extract the surrounding structure
+        auto left_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("pre", p.until(ARG_FIRST)) + p.literal(ARG_FIRST) +
+                   p.tag("suffix", p.until_one_of({"\"", "X"}));
+        });
+        auto right_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("pre", p.until(ARG_SECOND)) + p.literal(ARG_SECOND) +
+                   p.tag("suffix", p.until_one_of({"\"", "Y"}));
+        });
+        auto left_result  = left_parser.parse_anywhere_and_extract(diff.left);
+        auto right_result = right_parser.parse_anywhere_and_extract(diff.right);
+
+        if (left_result.result.success() && right_result.result.success() &&
+            !left_result.tags["pre"].empty() &&
+            left_result.tags["pre"] == right_result.tags["pre"] &&
+            left_result.tags["suffix"] == right_result.tags["suffix"]) {
+            // Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
+            arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
+            arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
+        } else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
+            // Name is directly in the diff: prefix comes from last marker in diff.prefix
+            auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+                return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("name_prefix", last_marker);
+            });
+            auto pre_result = pre_parser.parse_and_extract(diff.prefix);
+            arguments.name_prefix = pre_result.result.success()
+                ? pre_result.tags["name_prefix"] : diff.prefix;
+
+            // Suffix extends from after ARG_FIRST to the first marker (+ optional whitespace).
+            // The marker could be in diff.left itself or in diff.suffix, so we concatenate.
+            std::string after_first = diff.left.substr(ARG_FIRST.length()) + diff.suffix;
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.tag("suffix", p.zero_or_more(p.negate(p.marker()) + p.any()) +
+                                       p.marker() + p.space());
+            });
+            auto suf_result = suffix_parser.parse_anywhere_and_extract(after_first);
+            if (suf_result.result.success()) {
+                arguments.name_suffix = suf_result.tags["suffix"];
+            }
+        }
+    }
+}
+
+void analyze_tools::extract_argument_value_markers() {
+    json assistant_val_X = json{
+        { "role",       "assistant"                              },
+        { "content",    ""                                       },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_val_Y = json{
+        { "role",       "assistant"                                        },
+        { "content",    ""                                                 },
+        { "tool_calls", json::array({ first_tool_call_one_arg_other_val }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_val_X });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_val_Y }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left == "XXXX" && diff.right == "YYYY") {
+        std::string arg_name_ending = ARG_FIRST + arguments.name_suffix;
+        std::string prefix          = diff.prefix;
+        if (prefix.rfind(arg_name_ending) != std::string::npos) {
+            prefix = prefix.substr(prefix.rfind(arg_name_ending) + arg_name_ending.size());
+        }
+        if (!prefix.empty()) {
+            // Find the last marker + any trailing non-marker text to end
+            auto prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                auto last_marker = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+                return p.zero_or_more(p.negate(last_marker) + p.any()) + p.tag("val_prefix", last_marker);
+            });
+            auto pre_result = prefix_parser.parse_and_extract(prefix);
+            arguments.value_prefix = pre_result.result.success() ? pre_result.tags["val_prefix"] : prefix;
+        }
+
+        std::string value_suffix = diff.suffix;
+        if (!function.close.empty()) {
+            size_t func_close_pos = value_suffix.find(function.close);
+            if (func_close_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, func_close_pos);
+            }
+        } else if (!format.per_call_end.empty() || !format.section_end.empty()) {
+            std::string end_marker =
+                !format.per_call_end.empty() ? format.per_call_end : format.section_end;
+            size_t end_marker_pos = value_suffix.find(end_marker);
+            if (end_marker_pos != std::string::npos) {
+                value_suffix = value_suffix.substr(0, end_marker_pos);
+            }
+        }
+        value_suffix = trim_leading_whitespace(value_suffix);
+        if (!value_suffix.empty()) {
+            arguments.value_suffix = value_suffix;
+        }
+    }
+}
+
+void analyze_tools::extract_argument_separator() {
+    json assistant_one_arg = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    json assistant_two_args = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_one_arg });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_two_args }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (!diff.right.empty()) {
+        std::string separator        = until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND);
+        arguments.separator = separator;
+    }
+}
+
+void analyze_tools::extract_args_markers() {
+    json assistant_no_args = json{
+        { "role",       "assistant"},
+        { "content",    ""         },
+        { "tool_calls", json::array({ first_tool_call_zero_args }) }
+    };
+
+    json assistant_with_args = json{
+        { "role",       "assistant"},
+        { "content",    ""         },
+        { "tool_calls", json::array({ first_tool_call_one_arg }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_no_args });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_with_args }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (format.mode == tool_format::JSON_NATIVE) {
+        std::string prefix_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
+        std::string suffix_marker = !format.section_end.empty() ? format.section_end : format.per_call_end;
+        // these might happen earlier in the tools section as an example or somewhere else, so we need to find the closest ones
+        size_t prefix_pos = prefix_marker.empty() ? 0 : diff.prefix.rfind(prefix_marker);
+        size_t suffix_pos = suffix_marker.empty() ? diff.suffix.size() : diff.suffix.find(suffix_marker);
+        if (prefix_pos == std::string::npos) {
+            prefix_pos = 0;
+        }
+        if (suffix_pos == std::string::npos) {
+            suffix_pos = diff.suffix.size();
+        }
+        std::string prefix_cut = diff.prefix.substr(prefix_pos + prefix_marker.size());
+        std::string suffix_cut = diff.suffix.substr(0, suffix_pos);
+        std::string args_start = until_common_prefix(prefix_cut, "{}", "{\"first\":");
+        std::string args_end   = after_common_suffix(suffix_cut, "{}", "\"XXXX\"}");
+
+        if (!args_start.empty() || !args_end.empty()) {
+            size_t find_fun = args_start.find(FUN_FIRST);
+            if (find_fun != std::string::npos) {
+                args_start = args_start.substr(find_fun + FUN_FIRST.size(), args_start.size() - find_fun - FUN_FIRST.size());
+            }
+            size_t find_call_id = args_start.find(CALL_ID_001);
+            if (find_call_id != std::string::npos) {
+                args_start = args_start.substr(find_call_id + CALL_ID_001.size(), args_start.size() - find_call_id - CALL_ID_001.size());
+            }
+            arguments.start = args_start;
+            arguments.end   = args_end;
+        }
+    }
+}
+
+void analyze_tools::extract_call_id_markers() {
+    json assistant_id1 = json{
+        { "role",       "assistant" },
+        { "content",    ""                               },
+        { "tool_calls", json::array({ first_tool_call }) }
+    };
+
+    json assistant_id2 = json{
+        { "role",       "assistant" },
+        { "content",    ""          },
+        { "tool_calls", json::array({ first_tool_call_alt_id }) }
+    };
+
+    template_params params;
+    params.messages              = json::array({ user_msg, assistant_id1 });
+    params.tools                 = tools;
+    params.add_generation_prompt = false;
+    params.enable_thinking       = true;
+
+    auto comparison = compare_variants(
+        *tmpl, params, [&](template_params & p) { p.messages = json::array({ user_msg, assistant_id2 }); });
+
+    if (!comparison) {
+        LOG_DBG(ANSI_ORANGE "%s: Template application failed for call_id detection\n" ANSI_RESET, __func__);
+        return;
+    }
+
+    const auto & diff = comparison->diff;
+
+    if (diff.left.empty() && diff.right.empty()) {
+        return;
+    }
+
+    std::string id_value_1 = CALL_ID_001;
+    std::string id_value_2 = CALL_ID_999;
+
+    size_t common_id_prefix_len = 0;
+    for (size_t i = 0; i < std::min(id_value_1.length(), id_value_2.length()); i++) {
+        if (id_value_1[i] == id_value_2[i]) {
+            common_id_prefix_len++;
+        } else {
+            break;
+        }
+    }
+    std::string common_id_part = id_value_1.substr(0, common_id_prefix_len);
+
+    // Check if the function name is in the prefix (normal case: BETWEEN_FUNC_AND_ARGS or POST_ARGS)
+    // or in the suffix (call_id is PRE_FUNC_NAME)
+    std::string func_name           = FUN_FIRST;
+    size_t      func_name_in_prefix = diff.prefix.rfind(func_name);
+    size_t      func_name_in_suffix = diff.suffix.find(func_name);
+
+    // Helper: find the last marker in a string (returns just the marker, not trailing text)
+    auto find_last_marker = [](const std::string & str) -> std::string {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            auto last = p.marker() + p.zero_or_more(p.negate(p.marker()) + p.any()) + p.end();
+            return p.zero_or_more(p.negate(last) + p.any()) + p.tag("m", p.marker());
+        });
+        auto res = parser.parse_anywhere_and_extract(str);
+        return res.result.success() ? res.tags["m"] : "";
+    };
+
+    // Helper: find the first marker in a string
+    auto find_first_marker = [](const std::string & str) -> std::string {
+        auto parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+            return p.tag("m", p.marker());
+        });
+        auto res = parser.parse_anywhere_and_extract(str);
+        return res.result.success() ? res.tags["m"] : "";
+    };
+
+    if (func_name_in_prefix != std::string::npos && func_name_in_suffix == std::string::npos) {
+        // Function name is only in prefix - call_id is BETWEEN_FUNC_AND_ARGS or POST_ARGS
+        // Check if args indicator "{" is in prefix or suffix
+        size_t args_in_prefix = diff.prefix.find('{', func_name_in_prefix);
+        size_t args_in_suffix = diff.suffix.find('{');
+
+        if (args_in_suffix != std::string::npos &&
+            (args_in_prefix == std::string::npos || args_in_prefix > diff.prefix.length())) {
+            // Args are in suffix, so call_id is BETWEEN_FUNC_AND_ARGS
+            call_id.pos = call_id_position::BETWEEN_FUNC_AND_ARGS;
+
+            // Find call_id_prefix: marker immediately preceding common_id_part (no intervening markers)
+            std::string after_func = diff.prefix.substr(func_name_in_prefix + func_name.length());
+            auto id_prefix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.tag("prefix", p.marker()) +
+                       p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal(common_id_part)) + p.any()) +
+                       p.literal(common_id_part);
+            });
+            auto id_res = id_prefix_parser.parse_anywhere_and_extract(after_func);
+            if (id_res.result.success()) {
+                call_id.prefix = id_res.tags["prefix"];
+            } else {
+                // Fallback: use the last marker in after_func
+                call_id.prefix = find_last_marker(after_func);
+            }
+
+            // Extract call_id_suffix: the first marker in the suffix before args "{"
+            auto suffix_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
+                return p.zero_or_more(p.negate(p.marker()) + p.negate(p.literal("{")) + p.any()) +
+                       p.tag("suffix", p.marker());
+            });
+            auto suf_res = suffix_parser.parse_anywhere_and_extract(diff.suffix);
+            if (suf_res.result.success()) {
+                call_id.suffix = suf_res.tags["suffix"];
+            }
+        } else if (args_in_prefix != std::string::npos) {
+            // Args are in prefix, so call_id is POST_ARGS
+            call_id.pos = call_id_position::POST_ARGS;
+
+            // Extract last marker between args closing brace and the ID
+            std::string after_args    = diff.prefix.substr(args_in_prefix);
+            size_t      closing_brace = after_args.rfind('}');
+            if (closing_brace != std::string::npos) {
+                std::string between_args_and_id = after_args.substr(closing_brace + 1);
+                call_id.prefix = find_last_marker(between_args_and_id);
+            }
+
+            // call_id_suffix: first marker in diff.suffix
+            call_id.suffix = find_first_marker(diff.suffix);
+        }
+    } else if (func_name_in_suffix != std::string::npos && func_name_in_prefix == std::string::npos) {
+        // Function name is only in suffix - call_id is PRE_FUNC_NAME
+        call_id.pos = call_id_position::PRE_FUNC_NAME;
+
+        // call_id_prefix: last marker in diff.prefix
+        call_id.prefix = find_last_marker(diff.prefix);
+
+        // call_id_suffix: first marker in the portion of diff.suffix before func_name
+        std::string before_func = diff.suffix.substr(0, func_name_in_suffix);
+        call_id.suffix = find_first_marker(before_func);
+    }
+
+    if (call_id.prefix == arguments.end) {
+        call_id.prefix = "";
+    }
+
+    if (call_id.suffix == arguments.start) {
+        call_id.suffix = "";
+    }
+
+    // When call_id is detected, per_call_end may have been incorrectly set to include
+    // the call_id_suffix and sample args. Clear it if it starts with call_id_suffix.
+    if (call_id.pos != call_id_position::NONE && !call_id.suffix.empty() &&
+        format.per_call_end.find(call_id.suffix) == 0) {
+        format.per_call_end.clear();
+    }
+}
+
+}  // namespace autoparser
diff --git a/common/chat-parser-xml-toolcall.cpp b/common/chat-parser-xml-toolcall.cpp
deleted file mode 100644
index f2304e9cb1..0000000000
--- a/common/chat-parser-xml-toolcall.cpp
+++ /dev/null
@@ -1,919 +0,0 @@
-#include "chat.h"
-#include "chat-parser.h"
-#include "common.h"
-#include "json-partial.h"
-#include "json-schema-to-grammar.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-class xml_toolcall_syntax_exception : public std::runtime_error {
-  public:
-    xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-template<typename T>
-inline void sort_uniq(std::vector<T> &vec) {
-    std::sort(vec.begin(), vec.end());
-    vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
-}
-
-template<typename T>
-inline bool all_space(const T &str) {
-    return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
-}
-
-static size_t utf8_truncate_safe(const std::string_view s) {
-    size_t len = s.size();
-    if (len == 0) return 0;
-    size_t i = len;
-    for (size_t back = 0; back < 4 && i > 0; ++back) {
-        --i;
-        unsigned char c = s[i];
-        if ((c & 0x80) == 0) {
-            return len;
-        } else if ((c & 0xC0) == 0xC0) {
-            size_t expected_len = 0;
-            if ((c & 0xE0) == 0xC0) expected_len = 2;
-            else if ((c & 0xF0) == 0xE0) expected_len = 3;
-            else if ((c & 0xF8) == 0xF0) expected_len = 4;
-            else return i;
-            if (len - i >= expected_len) {
-                return len;
-            } else {
-                return i;
-            }
-        }
-    }
-    return len - std::min(len, size_t(3));
-}
-
-inline void utf8_truncate_safe_resize(std::string &s) {
-    s.resize(utf8_truncate_safe(s));
-}
-
-inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
-    return s.substr(0, utf8_truncate_safe(s));
-}
-
-static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
-    if (literal1.size() == 0) return builder.try_find_literal(literal2);
-    const auto saved_pos = builder.pos();
-    while (auto res = builder.try_find_literal(literal1)) {
-        builder.consume_spaces();
-        const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
-        if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
-            if (res->prelude.size() != res->groups[0].begin - saved_pos) {
-                res->prelude = builder.str({saved_pos, res->groups[0].begin});
-            }
-            builder.move_to(builder.pos() + match_len);
-            res->groups[0].end = builder.pos();
-            GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
-            return res;
-        }
-        builder.move_to(res->groups[0].begin + 1);
-    }
-    builder.move_to(saved_pos);
-    return std::nullopt;
-}
-
-/**
- * make a GBNF that accept any strings except those containing any of the forbidden strings.
- */
-std::string make_gbnf_excluding(std::vector<std::string> forbids) {
-    constexpr auto charclass_escape = [](unsigned char c) -> std::string {
-        if (c == '\\' || c == ']' || c == '^' || c == '-') {
-            std::string s = "\\";
-            s.push_back((char)c);
-            return s;
-        }
-        if (isprint(c)) {
-            return std::string(1, (char)c);
-        }
-        char buf[16];
-        snprintf(buf, 15, "\\x%02X", c);
-        return std::string(buf);
-    };
-    constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
-        std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
-        int i = l;
-        while (i < r) {
-            const std::string &s = forbids[i];
-            if ((int)s.size() == depth) {
-                ++i;
-                continue;
-            }
-            unsigned char c = (unsigned char)s[depth];
-            int j = i;
-            while (j < r && (int)forbids[j].size() > depth &&
-                   (unsigned char)forbids[j][depth] == c) {
-                ++j;
-            }
-            children.push_back({c, {i, j}});
-            i = j;
-        }
-        std::vector<std::string> alts;
-        if (!children.empty()) {
-            std::string cls;
-            for (auto &ch : children) cls += charclass_escape(ch.first);
-            alts.push_back(std::string("[^") + cls + "]");
-        }
-        for (auto &ch : children) {
-            std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
-            if (!childExpr.empty()) {
-                std::string quoted_ch = "\"";
-                if (ch.first == '\\') quoted_ch += "\\\\";
-                else if (ch.first == '"') quoted_ch += "\\\"";
-                else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
-                else {
-                    char buf[16];
-                    snprintf(buf, 15, "\\x%02X", ch.first);
-                    quoted_ch += buf;
-                }
-                quoted_ch += "\"";
-                std::string branch = quoted_ch + std::string(" ") + childExpr;
-                alts.push_back(branch);
-            }
-        }
-        if (alts.empty()) return "";
-        std::ostringstream oss;
-        oss << "( ";
-        for (size_t k = 0; k < alts.size(); ++k) {
-            if (k) oss << " | ";
-            oss << alts[k];
-        }
-        oss << " )";
-        return oss.str();
-    };
-    if (forbids.empty()) return "( . )*";
-    sort(forbids.begin(), forbids.end());
-    std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
-    if (expr.empty()) {
-        std::string cls;
-        for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
-        expr = std::string("( [^") + cls + "] )";
-    }
-    if (forbids.size() == 1)
-        return expr + "*";
-    else
-        return std::string("( ") + expr + " )*";
-}
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.tool_sep.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    std::string key_val_sep = form.key_val_sep;
-    if (form.key_val_sep2) {
-        key_val_sep += "\n";
-        key_val_sep += *form.key_val_sep2;
-    }
-    GGML_ASSERT(!key_val_sep.empty());
-
-    if (tools.is_array() && !tools.empty()) {
-        data.grammar = build_grammar([&](const common_grammar_builder &builder) {
-            auto string_arg_val = form.last_val_end ?
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
-                    builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
-
-            std::vector<std::string> tool_rules;
-            for (const auto & tool : tools) {
-                if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
-                    LOG("Skipping tool without function: %s", tool.dump(2).c_str());
-                    continue;
-                }
-                const auto & function = tool.at("function");
-                if (!function.contains("name") || !function.at("name").is_string()) {
-                    LOG("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
-                    continue;
-                }
-                if (!function.contains("parameters") || !function.at("parameters").is_object()) {
-                    LOG("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
-                    continue;
-                }
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                struct parameter_rule {
-                    std::string symbol_name;
-                    bool is_required;
-                };
-                std::vector<parameter_rule> arg_rules;
-                if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
-                    LOG("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
-                    continue;
-                } else {
-                    std::vector<std::string> requiredParameters;
-                    if (parameters.contains("required")) {
-                        try { parameters.at("required").get_to(requiredParameters); }
-                        catch (const std::runtime_error&) {
-                            LOG("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
-                        }
-                    }
-                    sort_uniq(requiredParameters);
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        std::string quoted_key = key;
-                        bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
-                        if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
-                            quoted_key = gbnf_format_literal(key);
-                            quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
-                        }
-                        arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
-                            gbnf_format_literal(form.key_start) + " " +
-                            gbnf_format_literal(quoted_key) + " " +
-                            gbnf_format_literal(key_val_sep) + " " +
-                            ((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
-                                    (form.raw_argval ?
-                                            string_arg_val :
-                                            "( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
-                                    ) :
-                                    builder.add_schema(name + "-arg-" + key, value)
-                            )
-                        ), required});
-                    }
-                }
-
-                auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
-                decltype(next_arg_with_sep) next_arg = "\"\"";
-                if (form.relax_arg) {
-                    if (!arg_rules.empty()) {
-                        std::vector<std::string> arg_symbols;
-                        arg_symbols.reserve(arg_rules.size());
-                        for (const auto & rule : arg_rules) {
-                            arg_symbols.push_back(rule.symbol_name);
-                        }
-                        auto any_arg = builder.add_rule(name + "-any-arg", string_join(arg_symbols, " | "));
-                        auto any_arg_with_end = builder.add_rule(name + "-any-arg-with-end", any_arg + " " + next_arg_with_sep);
-                        next_arg = builder.add_rule(name + "-args-relaxed", "( " + any_arg_with_end + " )*");
-                    }
-                } else {
-                    for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
-                        std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
-                        next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
-                                include_this_arg : "( " + include_this_arg + " ) | " + next_arg
-                        );
-                        include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
-                        next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
-                                include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
-                        );
-                    }
-                }
-
-                std::string quoted_name = name;
-                if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
-                    quoted_name = gbnf_format_literal(name);
-                    quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
-                }
-                quoted_name = gbnf_format_literal(quoted_name);
-                // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-                if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
-                    quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
-                }
-                // MiroThinker uses {{ name_part_1 }}</server_name>\n<tool_name>{{ name_part_2 }} as function name
-                if (data.format == COMMON_CHAT_FORMAT_MIROTHINKER) {
-                    auto server_split_pos = name.find("_");
-                    if (std::string::npos == server_split_pos) {
-                        quoted_name = "\"system_default</server_name>\\n<tool_name>\" " + quoted_name;
-                    } else {
-                        quoted_name = gbnf_format_literal(name.substr(0, server_split_pos)) +
-                                      " \"</server_name>\\n<tool_name>\" " +
-                                      gbnf_format_literal(name.substr(server_split_pos + 1));
-                    }
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                        gbnf_format_literal(form.tool_start) + " " +
-                        quoted_name + " " +
-                        gbnf_format_literal(form.tool_sep) + " " +
-                        next_arg
-                ));
-            }
-
-            auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
-            auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
-            auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
-            auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
-            builder.add_rule("root",
-                (form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
-                tool_call_multiple_with_end  + "?" +
-                (form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
-            );
-        });
-
-        // grammar trigger for tool call
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
-    }
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
-    GGML_ASSERT(!form.tool_start.empty());
-    GGML_ASSERT(!form.key_start.empty());
-    GGML_ASSERT(!form.key_val_sep.empty());
-    GGML_ASSERT(!form.val_end.empty());
-    GGML_ASSERT(!form.tool_end.empty());
-
-    // Helper to choose return false or throw error
-    constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
-        LOG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
-        if (recovery) {
-            builder.move_to(start_pos);
-            return false;
-        } else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
-    };
-    // Drop substring from needle to end from a JSON
-    constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
-        auto pos = json_str.rfind(needle);
-        if (pos == std::string::npos) {
-            return false;
-        }
-        for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
-            unsigned char ch = static_cast<unsigned char>(json_str[i]);
-            if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
-                return false;
-            }
-        }
-        if (pos != 0 && json_str[pos - 1] == '"') {
-            --pos;
-        }
-        json_str.resize(pos);
-        return true;
-    };
-    // Helper to generate a partial argument JSON
-    constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
-        auto rest = builder.consume_rest();
-        utf8_truncate_safe_resize(rest);
-        set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
-        auto tool_str = arguments.dump();
-        if (partial_json(tool_str)) {
-            if (builder.add_tool_call(function_name, "", tool_str)) {
-                return;
-            }
-        }
-        LOG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
-    };
-    // Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
-    constexpr auto try_find_close = [](
-            common_chat_msg_parser & builder,
-            const std::string & end,
-            const std::optional<std::string> & alt_end,
-            const std::string & end_next,
-            const std::optional<std::string> & alt_end_next
-    ) {
-        auto saved_pos = builder.pos();
-        auto tc = builder.try_find_literal(end);
-        auto val_end_size = end.size();
-        if (alt_end) {
-            auto pos_1 = builder.pos();
-            builder.move_to(saved_pos);
-            auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
-            if (alt_end_next) {
-                builder.move_to(saved_pos);
-                auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
-                if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
-                    tc2 = tc3;
-                }
-            }
-            if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
-                tc = tc2;
-                tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
-                builder.move_to(tc->groups[0].end);
-                val_end_size = alt_end->size();
-            } else {
-                builder.move_to(pos_1);
-            }
-        }
-        return std::make_pair(val_end_size, tc);
-    };
-    // Helper to find a val_end or last_val_end, returns matched pattern size
-    const auto try_find_val_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
-    };
-    // Helper to find a tool_end or last_tool_end, returns matched pattern size
-    const auto try_find_tool_end = [try_find_close, &builder, &form]() {
-        return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
-    };
-
-    bool recovery = true;
-    const auto start_pos = builder.pos();
-    if (!all_space(form.scope_start)) {
-        if (auto tc = builder.try_find_literal(form.scope_start)) {
-            if (all_space(tc->prelude)) {
-                if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
-                    throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
-            } else {
-                builder.move_to(start_pos);
-                return false;
-            }
-        } else return false;
-    }
-    while (auto tc = builder.try_find_literal(form.tool_start)) {
-        if (!all_space(tc->prelude)) {
-            LOG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                    gbnf_format_literal(form.tool_start).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            builder.move_to(tc->groups[0].begin - tc->prelude.size());
-            break;
-        }
-
-        // Find tool name
-        auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
-        if (!func_name) {
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-        if (!func_name) {
-            // Partial tool name not supported
-            throw common_chat_msg_partial_exception("incomplete tool_call");
-        }
-        // If the model generate multiple tool call and the first tool call has no argument
-        if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
-            builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
-            auto [sz, tc] = try_find_tool_end();
-            func_name = tc;
-        }
-        // Skip when tool_sep may be partial
-        if (builder.pos() == builder.input().size()) {
-            throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
-        }
-
-        // Parse tool name
-        builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
-        std::string function_name = string_strip(func_name->prelude);
-        // Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
-        if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
-            if (string_starts_with(function_name, "functions.")) {
-                static const std::regex re(":\\d+$");
-                if (std::regex_search(function_name, re)) {
-                    function_name = function_name.substr(10, function_name.rfind(":") - 10);
-                }
-            }
-        }
-        // MiroThinker uses {{ name_part_1 }}</server_name>\n<tool_name>{{ name_part_2 }} as function name
-        if (builder.syntax().format == COMMON_CHAT_FORMAT_MIROTHINKER) {
-            if (string_starts_with(function_name, "system_default</server_name>\n<tool_name>")) {
-                function_name = function_name.substr(14 + 26);
-            } else {
-                auto server_split_pos = function_name.find("</server_name>\n<tool_name>");
-                if (std::string::npos != server_split_pos) {
-                    function_name = function_name.substr(0, server_split_pos) + "_" + function_name.substr(server_split_pos + 26);
-                }
-            }
-        }
-
-        // Argument JSON
-        json arguments = json::object();
-
-        // Helper to generate a partial argument JSON
-        const auto gen_partial_args = [&](auto set_partial_arg) {
-            gen_partial_json(set_partial_arg, arguments, builder, function_name);
-        };
-
-        // Parse all arg_key/arg_value pairs
-        while (auto tc = builder.try_find_literal(form.key_start)) {
-            if (!all_space(tc->prelude)) {
-                LOG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
-                        gbnf_format_literal(form.key_start).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                builder.move_to(tc->groups[0].begin - tc->prelude.size());
-                break;
-            }
-            if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
-                auto tool_call_arg = arguments.dump();
-                if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-                    tool_call_arg.resize(tool_call_arg.size() - 1);
-                }
-                builder.add_tool_call(function_name, "", tool_call_arg);
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
-            }
-
-            // Parse arg_key
-            auto key_res = builder.try_find_literal(form.key_val_sep);
-            if (!key_res) {
-                gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
-                throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
-            }
-            if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
-                gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
-                throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
-            }
-            auto &key = key_res->prelude;
-            recovery = false;
-
-            // Parse arg_value
-            if (form.key_val_sep2) {
-                if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
-                    if (!all_space(tc->prelude)) {
-                        LOG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
-                                gbnf_format_literal(tc->prelude).c_str(),
-                                gbnf_format_literal(form.key_val_sep).c_str(),
-                                gbnf_format_literal(*form.key_val_sep2).c_str()
-                        );
-                        return return_error(builder, start_pos, false);
-                    }
-                    if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
-                    }
-                } else {
-                    gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
-                }
-            }
-            auto val_start = builder.pos();
-
-            // Test if arg_val is a partial JSON
-            std::optional<common_json> value_json = std::nullopt;
-            if (!form.raw_argval || !*form.raw_argval) {
-                try { value_json = builder.try_consume_json(); }
-                catch (const std::runtime_error&) { builder.move_to(val_start); }
-                // TODO: Delete this when json_partial adds top-level support for null/true/false
-                if (builder.pos() == val_start) {
-                    const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
-                    builder.consume_spaces();
-                    std::string_view sv = utf8_truncate_safe_view(builder.input());
-                    sv.remove_prefix(builder.pos());
-                    std::string rest = "a";
-                    if (sv.size() < 6) rest = sv;
-                    if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
-                        value_json = {123, {"123", "123"}};
-                        builder.consume_rest();
-                    } else {
-                        builder.move_to(val_start);
-                    }
-                }
-            }
-
-            // If it is a JSON and followed by </arg_value>, parse as json
-            // cannot support streaming because it may be a plain text starting with JSON
-            if (value_json) {
-                auto json_end = builder.pos();
-                builder.consume_spaces();
-                if (builder.pos() == builder.input().size()) {
-                    if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
-                        arguments[key] = value_json->json;
-                        auto json_str = arguments.dump();
-                        if (!value_json->healing_marker.json_dump_marker.empty()) {
-                            GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
-                            json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
-                        } else {
-                            GGML_ASSERT(json_str.back() == '}');
-                            json_str.resize(json_str.size() - 1);
-                        }
-                        builder.add_tool_call(function_name, "", json_str);
-                    } else {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                    }
-                    LOG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
-                    throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
-                }
-                builder.move_to(json_end);
-                auto [val_end_size, tc] = try_find_val_end();
-                if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
-                    if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
-                        LOG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
-                        throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
-                    } else arguments[key] = value_json->json;
-                } else builder.move_to(val_start);
-            }
-
-            // If not, parse as plain text
-            if (val_start == builder.pos()) {
-                if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
-                    auto &value_str = value_plain->prelude;
-                    if (form.trim_raw_argval) value_str = string_strip(value_str);
-                    if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
-                        gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
-                        throw common_chat_msg_partial_exception(
-                                "Expected " + gbnf_format_literal(form.val_end) +
-                                " after " + gbnf_format_literal(form.key_val_sep) +
-                                (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                        );
-                    }
-                    arguments[key] = value_str;
-                } else {
-                    if (form.trim_raw_argval) {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
-                    } else {
-                        gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
-                    }
-                    throw common_chat_msg_partial_exception(
-                            "Expected " + gbnf_format_literal(form.val_end) +
-                            " after " + gbnf_format_literal(form.key_val_sep) +
-                            (form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
-                    );
-                }
-            }
-        }
-
-        // Consume closing tag
-        if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
-            if (!all_space(tc->prelude)) {
-                LOG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                        gbnf_format_literal(form.tool_end).c_str(),
-                        gbnf_format_literal(tc->prelude).c_str()
-                );
-                return return_error(builder, start_pos, recovery);
-            }
-            if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
-                // Add the parsed tool call
-                if (!builder.add_tool_call(function_name, "", arguments.dump())) {
-                    throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
-                }
-                recovery = false;
-                continue;
-            }
-        }
-
-        auto tool_call_arg = arguments.dump();
-        if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
-            tool_call_arg.resize(tool_call_arg.size() - 1);
-        }
-        builder.add_tool_call(function_name, "", tool_call_arg);
-        throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
-    }
-    if (auto tc = builder.try_find_literal(form.scope_end)) {
-        if (!all_space(tc->prelude)) {
-            LOG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                    gbnf_format_literal(form.scope_end).c_str(),
-                    gbnf_format_literal(tc->prelude).c_str()
-            );
-            return return_error(builder, start_pos, recovery);
-        }
-    } else {
-        if (all_space(form.scope_end)) return true;
-        builder.consume_spaces();
-        if (builder.pos() == builder.input().size())
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        LOG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
-                gbnf_format_literal(form.scope_end).c_str(),
-                gbnf_format_literal(builder.consume_rest()).c_str()
-        );
-        return return_error(builder, start_pos, recovery);
-    }
-
-    return true;
-}
-
-/**
- * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
- * May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
- * form.scope_start, form.tool_sep and form.scope_end can be empty.
- */
-bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
-    auto pos = pos_;
-    auto tsize = result_.tool_calls.size();
-    try { return parse_xml_tool_calls(*this, form); }
-    catch (const xml_toolcall_syntax_exception&) {}
-    move_to(pos);
-    result_.tool_calls.resize(tsize);
-    return false;
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
-    constexpr auto rstrip = [](std::string &s) {
-        s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
-    };
-    // Erase substring from l to r, along with additional spaces nearby
-    constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
-        while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
-        ++l;
-        while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
-        if (l < r) str[l] = '\n';
-        if (l + 1 < r) str[l + 1] = '\n';
-        if (l != 0) l += 2;
-        str.erase(l, r - l);
-        return l;
-    };
-    constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
-        auto best_match = content.size();
-        for (auto pattern: list) {
-            if (pattern.size() == 0) continue;
-            for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
-                auto match_len = content.size() - match_idx;
-                if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
-                    best_match = match_idx;
-                }
-            }
-        }
-        if (content.size() > best_match) {
-            content.erase(best_match);
-        }
-    };
-    const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
-        return trim_suffix(content, {
-            start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
-            form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
-            form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
-            form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
-            form.scope_end
-        });
-    };
-
-
-    // Trim leading spaces without affecting keyword matching
-    static const common_regex spaces_regex("\\s*");
-    {
-        auto tc = builder.consume_regex(spaces_regex);
-        auto spaces = builder.str(tc.groups[0]);
-        auto s1 = spaces.size();
-        trim_potential_partial_word(spaces);
-        auto s2 = spaces.size();
-        builder.move_to(builder.pos() - (s1 - s2));
-    }
-
-    // Parse content
-    bool reasoning_unclosed = builder.syntax().thinking_forced_open;
-    std::string unclosed_reasoning_content("");
-    for (;;) {
-        auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
-        std::string content;
-        std::string tool_call_start;
-
-        if (tc) {
-            content = std::move(tc->prelude);
-            tool_call_start = builder.str(tc->groups[0]);
-            LOG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
-        } else {
-            content = builder.consume_rest();
-            utf8_truncate_safe_resize(content);
-        }
-
-        // Handle unclosed think block
-        if (reasoning_unclosed) {
-            if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
-                unclosed_reasoning_content += content;
-                if (!(form.allow_toolcall_in_think && tc)) {
-                    unclosed_reasoning_content += tool_call_start;
-                    continue;
-                }
-            } else {
-                reasoning_unclosed = false;
-                std::string reasoning_content;
-                if (pos == std::string::npos) {
-                    reasoning_content = std::move(content);
-                } else {
-                    reasoning_content = content.substr(0, pos);
-                    content.erase(0, pos + end_think.size());
-                }
-                if (builder.pos() == builder.input().size() && all_space(content)) {
-                    rstrip(reasoning_content);
-                    trim_potential_partial_word(reasoning_content);
-                    rstrip(reasoning_content);
-                    if (reasoning_content.empty()) {
-                        rstrip(unclosed_reasoning_content);
-                        trim_potential_partial_word(unclosed_reasoning_content);
-                        rstrip(unclosed_reasoning_content);
-                        if (unclosed_reasoning_content.empty()) continue;
-                    }
-                }
-                if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                    builder.add_content(start_think);
-                    builder.add_content(unclosed_reasoning_content);
-                    builder.add_content(reasoning_content);
-                    if (builder.pos() != builder.input().size() || !all_space(content))
-                        builder.add_content(end_think);
-                } else {
-                    builder.add_reasoning_content(unclosed_reasoning_content);
-                    builder.add_reasoning_content(reasoning_content);
-                }
-                unclosed_reasoning_content.clear();
-            }
-        }
-
-        // Handle multiple think block
-        bool toolcall_in_think = false;
-        for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
-            if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
-                if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                    auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
-                    builder.add_reasoning_content(reasoning_content);
-                    think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
-                } else {
-                    think_start = think_end + end_think.size() - 1;
-                }
-            } else {
-                // This <tool_call> start is in thinking block, skip this tool call
-                // This <tool_call> start is in thinking block
-                if (form.allow_toolcall_in_think) {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size());
-                } else {
-                    unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
-                }
-                reasoning_unclosed = true;
-                content.resize(think_start);
-                toolcall_in_think = true;
-            }
-        }
-
-        if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-            rstrip(content);
-            // Handle unclosed </think> token from content: delete all </think> token
-            if (auto pos = content.rfind(end_think); pos != std::string::npos) {
-                while (pos != std::string::npos) {
-                    pos = erase_spaces(content, pos, pos + end_think.size() - 1);
-                    pos = content.rfind(end_think, pos);
-                }
-            }
-            // Strip if needed
-            if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
-                content = string_strip(content);
-            }
-        }
-
-        // remove potential partial suffix
-        if (builder.pos() == builder.input().size() && builder.is_partial()) {
-            if (unclosed_reasoning_content.empty()) {
-                rstrip(content);
-                trim_potential_partial_word(content);
-                rstrip(content);
-            } else {
-                rstrip(unclosed_reasoning_content);
-                trim_potential_partial_word(unclosed_reasoning_content);
-                rstrip(unclosed_reasoning_content);
-            }
-        }
-
-        // consume unclosed_reasoning_content if allow_toolcall_in_think is set
-        if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
-                builder.add_reasoning_content(unclosed_reasoning_content);
-            } else {
-                if (content.empty()) {
-                    content = start_think + unclosed_reasoning_content;
-                } else {
-                    content += "\n\n" + start_think;
-                    content += unclosed_reasoning_content;
-                }
-            }
-            unclosed_reasoning_content.clear();
-        }
-
-        // Add content
-        if (!content.empty()) {
-            // If there are multiple content blocks
-            if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
-                builder.add_content("\n\n");
-            }
-            builder.add_content(content);
-        }
-
-        // This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
-        if (toolcall_in_think && !form.allow_toolcall_in_think) {
-            continue;
-        }
-
-        // There is no tool call and all content is parsed
-        if (!tc) {
-            GGML_ASSERT(builder.pos() == builder.input().size());
-            GGML_ASSERT(unclosed_reasoning_content.empty());
-            if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
-            break;
-        }
-
-        builder.move_to(tc->groups[0].begin);
-        if (builder.try_consume_xml_tool_calls(form)) {
-            auto end_of_tool = builder.pos();
-            builder.consume_spaces();
-            if (builder.pos() != builder.input().size()) {
-                builder.move_to(end_of_tool);
-                if (!builder.result().content.empty()) {
-                    builder.add_content("\n\n");
-                }
-            }
-        } else {
-            static const common_regex next_char_regex(".");
-            auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
-            rstrip(c);
-            builder.add_content(c);
-        }
-    }
-}
-
-/**
- * Parse content uses reasoning and XML-Style tool call
- * TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
- */
-void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
-    parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
-}
diff --git a/common/chat-parser-xml-toolcall.h b/common/chat-parser-xml-toolcall.h
deleted file mode 100644
index fed28d8695..0000000000
--- a/common/chat-parser-xml-toolcall.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#pragma once
-
-#include "chat.h"
-
-#include <nlohmann/json.hpp>
-
-#include <optional>
-#include <string>
-#include <vector>
-
-
-// Sample config:
-// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
-// GLM 4.5   (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
-struct xml_tool_call_format {
-    std::string scope_start; // <minimax:tool_call>\n  // \n                      // can be empty
-    std::string tool_start;  // <invoke name=\"        // <tool_call>
-    std::string tool_sep;    // \">\n                  // \n                      // can be empty only for parse_xml_tool_calls
-    std::string key_start;   // <parameter name=\"     // <arg_key>
-    std::string key_val_sep; // \">                    // </arg_key>\n<arg_value>
-    std::string val_end;     // </parameter>\n         // </arg_value>\n
-    std::string tool_end;    // </invoke>\n            // </tool_call>\n
-    std::string scope_end;   // </minimax:tool_call>   //                         // can be empty
-    // Set this if there can be dynamic spaces inside key_val_sep.
-    // e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
-    std::optional<std::string> key_val_sep2 = std::nullopt;
-    // Set true if argval should only be raw string. e.g. Hello "world" hi
-    // Set false if argval should only be json string. e.g. "Hello \"world\" hi"
-    // Defaults to std::nullopt, both will be allowed.
-    std::optional<bool> raw_argval = std::nullopt;
-    std::optional<std::string> last_val_end = std::nullopt;
-    std::optional<std::string> last_tool_end = std::nullopt;
-    bool trim_raw_argval = false;
-    bool allow_toolcall_in_think = false;
-    // Set true to allows function arguments in arbitrary order and without
-    // enforcing required field.
-    bool relax_arg = false;
-};
-
-// make a GBNF that accept any strings except those containing any of the forbidden strings.
-std::string make_gbnf_excluding(std::vector<std::string> forbids);
-
-/**
- * Build grammar for xml-style tool call
- * form.scope_start and form.scope_end can be empty.
- * Requires data.format for model-specific hacks.
- */
-void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
diff --git a/common/chat-parser.cpp b/common/chat-parser.cpp
deleted file mode 100644
index e03be67003..0000000000
--- a/common/chat-parser.cpp
+++ /dev/null
@@ -1,1567 +0,0 @@
-#include "chat-parser.h"
-#include "chat-peg-parser.h"
-#include "common.h"
-#include "log.h"
-#include "peg-parser.h"
-#include "regex-partial.h"
-
-#include <algorithm>
-#include <cctype>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <string_view>
-#include <vector>
-
-using json = nlohmann::ordered_json;
-
-static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
-                                                const common_regex &     prefix,
-                                                size_t                   rstrip_prefix = 0) {
-    static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
-    if (auto res = builder.try_find_regex(prefix)) {
-        builder.move_back(rstrip_prefix);
-        auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
-        if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call array");
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
-    std::string arguments;
-    if (builder.is_partial()) {
-        arguments = (json{
-                         { "code", code + builder.healing_marker() }
-        })
-                        .dump();
-        auto idx = arguments.find(builder.healing_marker());
-        if (idx != std::string::npos) {
-            arguments.resize(idx);
-        }
-    } else {
-        arguments = (json{
-                         { "code", code }
-        })
-                        .dump();
-    }
-    return arguments;
-}
-
-/**
- * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
- * Aggregates the prefix, suffix and in-between text into the content.
- */
-static void parse_json_tool_calls(
-    common_chat_msg_parser &            builder,
-    const std::optional<common_regex> & block_open,
-    const std::optional<common_regex> & function_regex_start_only,
-    const std::optional<common_regex> & function_regex,
-    const common_regex &                close_regex,
-    const std::optional<common_regex> & block_close,
-    bool                                allow_raw_python = false,
-    const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
-        nullptr) {
-    auto parse_tool_calls = [&]() {
-        size_t from  = std::string::npos;
-        auto   first = true;
-        while (true) {
-            auto start_pos = builder.pos();
-            auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
-                       function_regex                     ? builder.try_find_regex(*function_regex, from) :
-                                                            std::nullopt;
-
-            if (res) {
-                std::string name;
-                if (get_function_name) {
-                    name = get_function_name(*res);
-                } else {
-                    GGML_ASSERT(res->groups.size() == 2);
-                    name = builder.str(res->groups[1]);
-                }
-                first = false;
-                if (name.empty()) {
-                    // get_function_name signalled us that we should skip this match and treat it as content.
-                    from = res->groups[0].begin + 1;
-                    continue;
-                }
-                from = std::string::npos;
-
-                auto maybe_raw_python = name == "python" && allow_raw_python;
-                if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
-                    if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
-                        if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
-                            throw common_chat_msg_partial_exception("incomplete tool call");
-                        }
-                        builder.consume_regex(close_regex);
-                    }
-                    continue;
-                }
-                if (maybe_raw_python) {
-                    auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-                    if (!builder.add_tool_call(name, "", arguments)) {
-                        throw common_chat_msg_partial_exception("incomplete tool call");
-                    }
-                    return;
-                }
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            } else {
-                builder.move_to(start_pos);
-            }
-            break;
-        }
-        if (block_close) {
-            builder.consume_regex(*block_close);
-        }
-        builder.consume_spaces();
-        builder.add_content(builder.consume_rest());
-    };
-    if (block_open) {
-        if (auto res = builder.try_find_regex(*block_open)) {
-            parse_tool_calls();
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-    } else {
-        parse_tool_calls();
-    }
-}
-
-common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax)
-    : input_(input), is_partial_(is_partial), syntax_(syntax)
-{
-    result_.role = "assistant";
-
-    while (true) {
-        std::string id = std::to_string(std::rand());
-        if (input.find(id) == std::string::npos) {
-            healing_marker_ = id;
-            break;
-        }
-    }
-}
-
-std::string common_chat_msg_parser::str(const common_string_range & rng) const {
-    GGML_ASSERT(rng.begin <= rng.end);
-    return input_.substr(rng.begin, rng.end - rng.begin);
-}
-
-void common_chat_msg_parser::add_content(const std::string &content) {
-    result_.content += content;
-}
-
-void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
-    result_.reasoning_content += reasoning_content;
-}
-
-bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
-    if (name.empty()) {
-        return false;
-    }
-
-    common_chat_tool_call tool_call;
-    tool_call.name = name;
-    tool_call.arguments = arguments;
-    tool_call.id = id;
-
-    // LOG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
-    result_.tool_calls.emplace_back(tool_call);
-
-    return true;
-}
-bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
-    std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
-    std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
-    std::string arguments = "";
-    if (tool_call.contains("arguments")) {
-        if (tool_call.at("arguments").is_object()) {
-            arguments = tool_call.at("arguments").dump();
-        } else {
-            arguments = tool_call.at("arguments");
-        }
-    }
-
-    return add_tool_call(name, id, arguments);
-}
-
-bool common_chat_msg_parser::add_tool_calls(const json & arr) {
-    for (const auto & item : arr) {
-        if (!add_tool_call(item)) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool common_chat_msg_parser::add_tool_call_short_form(const json& tool_call) {
-    if (!tool_call.is_object() || tool_call.size() != 1) {
-        return false;
-    }
-
-    // Get the tool name (the single key in the object)
-    auto it = tool_call.begin();
-    std::string name = it.key();
-
-    if (name.empty()) {
-        return false;
-    }
-
-    // Get the arguments (the nested object)
-    const json& args_json = it.value();
-    std::string arguments = "";
-
-    if (args_json.is_object()) {
-        arguments = args_json.dump();
-    }
-    else if (args_json.is_string()) {
-        arguments = args_json;
-    }
-    else if (!args_json.is_null()) {
-        // For other types, convert to string representation
-        arguments = args_json.dump();
-    }
-
-    return add_tool_call(name, "", arguments);
-}
-
-void common_chat_msg_parser::finish() {
-    if (!is_partial_ && pos_ != input_.size()) {
-        throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
-    }
-}
-
-bool common_chat_msg_parser::consume_spaces() {
-    const auto length = input_.size();
-    auto consumed = false;
-    while (pos_ < length && std::isspace(input_[pos_])) {
-        ++pos_;
-        consumed = true;
-    }
-    return consumed;
-}
-
-bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
-    auto pos = pos_;
-    for (auto i = 0u; i < literal.size(); ++i) {
-        if (pos >= input_.size()) {
-            return false;
-        }
-        if (input_[pos] != literal[i]) {
-            return false;
-        }
-        ++pos;
-    }
-    pos_ = pos;
-    return true;
-}
-
-std::optional<common_chat_msg_parser::find_regex_result>  common_chat_msg_parser::try_find_literal(const std::string & literal) {
-    auto idx = input_.find(literal, pos_);
-    if (idx != std::string::npos) {
-        find_regex_result res;
-        res.prelude = input_.substr(pos_, idx - pos_);
-        auto end = idx + literal.size();
-        res.groups.emplace_back(common_string_range{idx, end});
-        move_to(end);
-        return res;
-    }
-    if (is_partial_) {
-        idx = string_find_partial_stop(input_, literal);
-        if (idx != std::string::npos && idx >= pos_) {
-            find_regex_result res;
-            res.prelude = input_.substr(pos_, idx - pos_);
-            auto end = input_.size();
-            res.groups.emplace_back(common_string_range{idx, end});
-            move_to(end);
-            return res;
-        }
-    }
-    return std::nullopt;
-}
-
-void common_chat_msg_parser::consume_literal(const std::string & literal) {
-    if (!try_consume_literal(literal)) {
-        throw common_chat_msg_partial_exception(literal);
-    }
-}
-
-bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
-    std::string pending_reasoning_prefix;
-
-    if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-        return false;
-    }
-
-    auto set_reasoning_prefix = [&](size_t prefix_pos) {
-        if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
-            return;
-        }
-        if (prefix_pos + start_think.size() > input_.size()) {
-            pending_reasoning_prefix.clear();
-            return;
-        }
-        // Capture the exact literal that opened the reasoning section so we can
-        // surface it back to callers. This ensures formats that force the
-        // reasoning tag open (e.g. DeepSeek R1) retain their original prefix
-        // instead of dropping it during parsing.
-        pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
-    };
-
-    auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
-        auto stripped_reasoning = string_strip(reasoning);
-        if (stripped_reasoning.empty()) {
-            return;
-        }
-        if (syntax_.reasoning_in_content) {
-            add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
-            add_content(stripped_reasoning);
-            if (closed) {
-                add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
-            }
-        } else {
-            if (!pending_reasoning_prefix.empty()) {
-                add_reasoning_content(pending_reasoning_prefix);
-                pending_reasoning_prefix.clear();
-            }
-            add_reasoning_content(stripped_reasoning);
-        }
-    };
-
-    const size_t saved_pos = pos_;
-    const size_t saved_content_size = result_.content.size();
-    const size_t saved_reasoning_size = result_.reasoning_content.size();
-
-    auto restore_state = [&]() {
-        move_to(saved_pos);
-        result_.content.resize(saved_content_size);
-        result_.reasoning_content.resize(saved_reasoning_size);
-    };
-
-    // Allow leading whitespace to be preserved as content when reasoning is present at the start
-    size_t cursor = pos_;
-    size_t whitespace_end = cursor;
-    while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
-        ++whitespace_end;
-    }
-
-    if (whitespace_end >= input_.size()) {
-        restore_state();
-        if (syntax_.thinking_forced_open) {
-            auto rest = input_.substr(saved_pos);
-            if (!rest.empty()) {
-                handle_reasoning(rest, /* closed */ !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-        return false;
-    }
-
-    cursor = whitespace_end;
-    const size_t remaining = input_.size() - cursor;
-    const size_t start_prefix = std::min(start_think.size(), remaining);
-    const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
-
-    if (has_start_tag && start_prefix < start_think.size()) {
-        move_to(input_.size());
-        return true;
-    }
-
-    if (has_start_tag) {
-        if (whitespace_end > pos_) {
-            add_content(input_.substr(pos_, whitespace_end - pos_));
-        }
-        set_reasoning_prefix(cursor);
-        cursor += start_think.size();
-    } else if (syntax_.thinking_forced_open) {
-        cursor = whitespace_end;
-    } else {
-        restore_state();
-        return false;
-    }
-    while (true) {
-        if (cursor >= input_.size()) {
-            move_to(input_.size());
-            return true;
-        }
-
-        size_t end_pos = input_.find(end_think, cursor);
-        if (end_pos == std::string::npos) {
-            std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
-            size_t partial_off = string_find_partial_stop(remaining_view, end_think);
-            size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
-            if (reasoning_end > cursor) {
-                handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
-            }
-            move_to(input_.size());
-            return true;
-        }
-
-        if (end_pos > cursor) {
-            handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
-        } else {
-            handle_reasoning("", /* closed */ true);
-        }
-
-        cursor = end_pos + end_think.size();
-
-        while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
-            ++cursor;
-        }
-
-        const size_t next_remaining = input_.size() - cursor;
-        if (next_remaining == 0) {
-            move_to(cursor);
-            return true;
-        }
-
-        const size_t next_prefix = std::min(start_think.size(), next_remaining);
-        if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
-            if (next_prefix < start_think.size()) {
-                move_to(input_.size());
-                return true;
-            }
-            set_reasoning_prefix(cursor);
-            cursor += start_think.size();
-            continue;
-        }
-
-        move_to(cursor);
-        return true;
-    }
-}
-
-std::string common_chat_msg_parser::consume_rest() {
-    auto rest = input_.substr(pos_);
-    pos_ = input_.size();
-    return rest;
-}
-
-// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
-    auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
-    pos_ = m.groups[0].end;
-
-    if (add_prelude_to_content) {
-        add_content(prelude);
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    return find_regex_result{prelude, m.groups};
-}
-
-common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
-    if (auto result = try_consume_regex(regex)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception(regex.str());
-}
-
-std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
-    auto m = regex.search(input_, pos_);
-    if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
-        return std::nullopt;
-    }
-    if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
-        if (is_partial()) {
-            throw common_chat_msg_partial_exception(regex.str());
-        }
-        return std::nullopt;
-    }
-    if (m.groups[0].begin != pos_) {
-        // Didn't match at the current position.
-        return std::nullopt;
-    }
-    pos_ = m.groups[0].end;
-
-    return find_regex_result {
-        /* .prelude = */ "",
-        m.groups,
-    };
-}
-
-std::optional<common_json> common_chat_msg_parser::try_consume_json() {
-    auto it = input_.cbegin() + pos_;
-    const auto end = input_.cend();
-    common_json result;
-    if (!common_json_parse(it, end, healing_marker_, result)) {
-        return std::nullopt;
-    }
-    pos_ = std::distance(input_.cbegin(), it);
-    if (result.healing_marker.marker.empty()) {
-        // No healing marker, just return the parsed json
-        return result;
-    }
-    if (!is_partial()) {
-        throw common_chat_msg_partial_exception("JSON");
-    }
-    return result;
-}
-
-common_json common_chat_msg_parser::consume_json() {
-    if (auto result = try_consume_json()) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
-        return *result;
-    }
-    throw common_chat_msg_partial_exception("JSON");
-}
-
-std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
-    const std::vector<std::vector<std::string>> & args_paths,
-    const std::vector<std::vector<std::string>> & content_paths
-) {
-    auto partial = try_consume_json();
-    if (!partial) {
-        return std::nullopt;
-    }
-    auto is_arguments_path = [&](const std::vector<std::string> & path) {
-        return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
-    };
-    auto is_content_path = [&](const std::vector<std::string> & path) {
-        return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
-    };
-
-    if (partial->healing_marker.marker.empty()) {
-        if (args_paths.empty()) {
-            // No arguments to dump, and JSON was parsed fully.
-            return consume_json_result {
-                partial->json,
-                /* .is_partial = */ false,
-            };
-        }
-        if (is_arguments_path({})) {
-            // Entire JSON is the arguments and was parsed fully.
-            return consume_json_result {
-                partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
-                /* .is_partial = */ false,
-            };
-        }
-    }
-
-    LOG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-
-    auto found_healing_marker = false;
-    std::vector<std::string> path;
-    std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
-        if (is_arguments_path(path)) {
-            auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
-            if (is_partial() && !partial->healing_marker.marker.empty()) {
-                auto idx = arguments.find(partial->healing_marker.json_dump_marker);
-                if (idx != std::string::npos) {
-                    arguments.resize(idx);
-                    found_healing_marker = true;
-                }
-                if (arguments == "\"") {
-                    // This happens because of completing `:"$magic` after `"arguments"`
-                    arguments = "";
-                }
-            }
-            return arguments;
-        }
-        if (is_content_path(path)) {
-            if (!j.is_string()) {
-                throw std::runtime_error("Content path must be a string");
-            }
-            std::string str = j;
-            auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
-            if (idx != std::string::npos) {
-                str.resize(idx);
-                found_healing_marker = true;
-            }
-            return str;
-        }
-        if (j.is_object()) {
-            auto obj = json::object();
-            for (const auto & p : j.items()) {
-                const auto & key = p.key();
-                const auto & value = p.value();
-                const std::string key_str = key; // NOLINT
-                auto idx = key_str.find(healing_marker_);
-                if (idx != std::string::npos) {
-                    found_healing_marker = true;
-                    break;
-                }
-                path.push_back(key_str);
-                if (value.is_string()) {
-                    const std::string value_str = value;
-                    if (value_str.find(healing_marker_) != std::string::npos) {
-                        found_healing_marker = true;
-                        if (is_content_path(path)) {
-                            if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
-                                // The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
-                                obj[key] = remove_unsupported_healings_and_dump_args(value);
-                            }
-                        }
-                        break;
-                    }
-                    obj[key] = value;
-                } else {
-                    obj[key] = remove_unsupported_healings_and_dump_args(value);
-                }
-                path.pop_back();
-            }
-            return obj;
-        }
-        if (j.is_array()) {
-            auto arr = json::array();
-            for (const auto & value : j) {
-                if (value.is_string()) {
-                    std::string str = value;
-                    auto idx = str.find(healing_marker_);
-                    if (idx != std::string::npos) {
-                        // Don't heal array values that aren't in the arguments.
-                        found_healing_marker = true;
-                        break;
-                    }
-                }
-                arr.push_back(remove_unsupported_healings_and_dump_args(value));
-            }
-            return arr;
-        }
-        return j;
-    };
-
-    auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
-    LOG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
-    return consume_json_result {
-        cleaned,
-        /* .is_partial = */ found_healing_marker,
-    };
-}
-
-void common_chat_msg_parser::clear_tools() {
-    result_.tool_calls.clear();
-}
-
-/**
- * All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
- * to reduce incremental compile time for parser changes.
- */
-static void common_chat_parse_generic(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const std::vector<std::vector<std::string>> content_paths = {
-        {"response"},
-    };
-    static const std::vector<std::vector<std::string>> args_paths = {
-        {"tool_call", "arguments"},
-        {"tool_calls", "arguments"},
-    };
-    auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
-    if (data.value.contains("tool_calls")) {
-        if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool calls");
-        }
-    } else if (data.value.contains("tool_call")) {
-        if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-    } else if (data.value.contains("response")) {
-        const auto & response = data.value.at("response");
-        builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
-        if (data.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete response");
-        }
-    } else {
-        throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
-    }
-}
-
-static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("[THINK]", "[/THINK]");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
-    parse_prefixed_json_tool_call_array(builder, prefix);
-}
-
-static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-    static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-    static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-    static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-    static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-    if (auto res = builder.try_find_regex(start_action_regex)) {
-        // If we didn't extract thoughts, prelude includes them.
-        auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
-        for (const auto & tool_call : tool_calls.value) {
-            std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-            std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-            std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-            if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-        if (tool_calls.is_partial) {
-            throw common_chat_msg_partial_exception("incomplete tool call");
-        }
-        builder.consume_regex(end_action_regex);
-    } else if (auto res = builder.try_find_regex(start_response_regex)) {
-        if (!builder.try_find_regex(end_response_regex)) {
-            builder.add_content(builder.consume_rest());
-            throw common_chat_msg_partial_exception(end_response_regex.str());
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex function_regex(
-        "\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
-    static const common_regex close_regex("\\}\\s*");
-
-    static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
-    static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
-
-    if (with_builtin_tools) {
-        static const common_regex builtin_call_regex("<\\|python_tag\\|>");
-        if (auto res = builder.try_find_regex(builtin_call_regex)) {
-            auto fun_res = builder.consume_regex(function_name_regex);
-            auto function_name = builder.str(fun_res.groups[1]);
-
-            common_healing_marker healing_marker;
-            json args = json::object();
-            while (true) {
-                if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
-                    auto arg_name = builder.str(arg_res->groups[1]);
-                    auto partial = builder.consume_json();
-                    args[arg_name] = partial.json;
-                    healing_marker.marker = partial.healing_marker.marker;
-                    healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
-                    builder.consume_spaces();
-                    if (!builder.try_consume_literal(",")) {
-                        break;
-                    }
-                } else {
-                    break;
-                }
-            }
-            builder.consume_literal(")");
-            builder.consume_spaces();
-
-            auto arguments = args.dump();
-            if (!builder.add_tool_call(function_name, "", arguments)) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            return;
-        }
-    }
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ function_regex,
-        /* function_regex= */ std::nullopt,
-        close_regex,
-        std::nullopt);
-
-}
-
-static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?function<｜tool▁sep｜>([^\n]+)\n```json\n");
-    static const common_regex close_regex("```[\\s\\r\\n]*<｜tool▁call▁end｜>");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
-    static const common_regex function_regex("(?:<｜tool▁call▁begin｜>)?([^\\n<]+)(?:<｜tool▁sep｜>)");
-
-    static const common_regex close_regex("(?:[\\s]*)?<｜tool▁call▁end｜>");
-    static const common_regex tool_calls_begin("(?:<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)");
-    static const common_regex tool_calls_end("<｜tool▁calls▁end｜>");
-
-    if (!builder.syntax().parse_tool_calls) {
-        LOG_DBG("%s: not parse_tool_calls\n", __func__);
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    LOG_DBG("%s: parse_tool_calls\n", __func__);
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ tool_calls_begin,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        tool_calls_end);
-}
-
-static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
-    // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
-    // First try to parse using the standard reasoning parsing method
-    LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
-
-    auto start_pos = builder.pos();
-    auto found_end_think = builder.try_find_literal("</think>");
-    builder.move_to(start_pos);
-
-    if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
-        LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else if (builder.try_parse_reasoning("<think>", "</think>")) {
-        // If reasoning was parsed successfully, the remaining content is regular content
-        LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
-        // </think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>NAME\n```json\nJSON\n```<｜tool▁call▁end｜><｜tool▁calls▁end｜>
-        common_chat_parse_deepseek_v3_1_content(builder);
-    } else {
-        if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
-          LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
-          common_chat_parse_deepseek_v3_1_content(builder);
-          return;
-        }
-        // If no reasoning tags found, check if we should treat everything as reasoning
-        if (builder.syntax().thinking_forced_open) {
-            // If thinking is forced open but no tags found, treat everything as reasoning
-            LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
-            builder.add_reasoning_content(builder.consume_rest());
-        } else {
-            LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
-            // <｜tool▁call▁begin｜>NAME<｜tool▁sep｜>JSON<｜tool▁call▁end｜>
-            common_chat_parse_deepseek_v3_1_content(builder);
-        }
-    }
-}
-
-static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</invoke>",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_qwen3_coder_xml(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form{};
-        form.scope_start = "<tool_call>";
-        form.tool_start = "<function=";
-        form.tool_sep = ">";
-        form.key_start = "<parameter=";
-        form.key_val_sep = ">";
-        form.val_end = "</parameter>";
-        form.tool_end = "</function>";
-        form.scope_end = "</tool_call>";
-        form.trim_raw_argval = true;
-        return form;
-        })();
-        builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\":";
-        form.val_end     = ",";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.allow_toolcall_in_think = true;
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
-}
-
-static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_mirothinker(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<use_mcp_tool>";
-        form.tool_start  = "<server_name>";
-        form.tool_sep    = "</tool_name>\n<arguments>\n{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\":";
-        form.val_end     = ",";
-       form.tool_end    = "}\n</arguments>";
-        form.scope_end   = "</use_mcp_tool>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    builder.consume_reasoning_with_xml_tool_calls(form);
-}
-
-static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
-    static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
-    static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
-
-    static const common_regex start_regex("<\\|start\\|>assistant");
-    static const common_regex analysis_regex("<\\|channel\\|>analysis");
-    static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
-    static const common_regex preamble_regex("<\\|channel\\|>commentary");
-    static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
-    static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
-
-    auto consume_end = [&](bool include_end = false) {
-        if (auto res = builder.try_find_literal("<|end|>")) {
-            return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
-        }
-        return builder.consume_rest();
-    };
-
-    auto handle_tool_call = [&](const std::string & name) {
-        if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
-            if (builder.syntax().parse_tool_calls) {
-                if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            } else if (args->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    };
-
-    auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
-        auto match = regex.search(input, 0, true);
-        if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
-            return match;
-        }
-        return std::nullopt;
-    };
-
-    do {
-        auto header_start_pos = builder.pos();
-        auto content_start = builder.try_find_literal("<|message|>");
-        if (!content_start) {
-            throw common_chat_msg_partial_exception("incomplete header");
-        }
-
-        auto header = content_start->prelude;
-
-        if (auto match = regex_match(tool_call1_regex, header)) {
-            auto group = match->groups[1];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (auto match = regex_match(tool_call2_regex, header)) {
-            auto group = match->groups[2];
-            auto name = header.substr(group.begin, group.end - group.begin);
-            handle_tool_call(name);
-            continue;
-        }
-
-        if (regex_match(analysis_regex, header)) {
-            builder.move_to(header_start_pos);
-            if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
-                builder.add_content(consume_end(true));
-            } else {
-                builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
-            }
-            continue;
-        }
-
-        if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
-            builder.add_content(consume_end());
-            continue;
-        }
-
-        // Possibly a malformed message, attempt to recover by rolling
-        // back to pick up the next <|start|>
-        LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
-        builder.move_to(header_start_pos);
-    } while (builder.try_find_regex(start_regex, std::string::npos, false));
-
-    auto remaining = builder.consume_rest();
-    if (!remaining.empty()) {
-        LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
-    }
-}
-
-static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start  = */ "",
-        /* form.tool_start   = */ "<tool_call>",
-        /* form.tool_sep     = */ "",
-        /* form.key_start    = */ "<arg_key>",
-        /* form.key_val_sep  = */ "</arg_key>",
-        /* form.val_end      = */ "</arg_value>",
-        /* form.tool_end     = */ "</tool_call>",
-        /* form.scope_end    = */ "",
-        /* form.key_val_sep2 = */ "<arg_value>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
-}
-
-static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    static const common_regex prefix(regex_escape(" functools["));
-    parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
-}
-
-static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
-    static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
-    static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
-    static const common_regex close_regex(R"(\s*)");
-
-    parse_json_tool_calls(
-        builder,
-        std::nullopt,
-        function_regex_start_only,
-        function_regex,
-        close_regex,
-        std::nullopt,
-        /* allow_raw_python= */ true,
-        /* get_function_name= */ [&](const auto & res) -> std::string {
-            auto at_start = res.groups[0].begin == 0;
-            auto name = builder.str(res.groups[1]);
-            if (!name.empty() && name.back() == '{') {
-                // Unconsume the opening brace '{' to ensure the JSON parsing goes well.
-                builder.move_back(1);
-            }
-            auto idx = name.find_last_not_of("\n{");
-            name = name.substr(0, idx + 1);
-            if (at_start && name == "all") {
-                return "";
-            }
-            return name;
-        });
-}
-
-static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-    // This version of Functionary still supports the llama 3.1 tool call format for the python tool.
-    static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
-
-    static const common_regex function_regex(R"(<function=(\w+)>)");
-    static const common_regex close_regex(R"(</function>)");
-
-    parse_json_tool_calls(
-        builder,
-        /* block_open= */ std::nullopt,
-        /* function_regex_start_only= */ std::nullopt,
-        function_regex,
-        close_regex,
-        std::nullopt);
-
-    if (auto res = builder.try_find_regex(python_tag_regex)) {
-        auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
-        builder.add_tool_call("python", "", arguments);
-        return;
-    }
-}
-
-static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    static const common_regex open_regex(
-        "(?:"
-            "(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
-            "("                          // match 2 (open_tag)
-                "<tool_call>"
-                "|<function_call>"
-                "|<tool>"
-                "|<tools>"
-                "|<response>"
-                "|<json>"
-                "|<xml>"
-                "|<JSON>"
-            ")?"
-            "(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
-        ")"
-        "|<function=([^>]+)>"            // match 4 (function name)
-        "|<function name=\"([^\"]+)\">"  // match 5 (function name again)
-    );
-
-    while (auto res = builder.try_find_regex(open_regex)) {
-        const auto & block_start = res->groups[1];
-        std::string block_end = block_start.empty() ? "" : "```";
-
-        const auto & open_tag = res->groups[2];
-        std::string close_tag;
-
-        if (!res->groups[3].empty()) {
-            builder.move_to(res->groups[3].begin);
-            close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
-
-            if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
-                if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            } else {
-                throw common_chat_msg_partial_exception("failed to parse tool call");
-            }
-        } else {
-            auto function_name = builder.str(res->groups[4]);
-            if (function_name.empty()) {
-                function_name = builder.str(res->groups[5]);
-            }
-            GGML_ASSERT(!function_name.empty());
-
-            close_tag = "</function>";
-
-            if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
-                if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-                builder.consume_spaces();
-                builder.consume_literal(close_tag);
-                builder.consume_spaces();
-                if (!block_end.empty()) {
-                    builder.consume_literal(block_end);
-                    builder.consume_spaces();
-                }
-            }
-        }
-    }
-
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_granite(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    static const common_regex start_think_regex(regex_escape("<think>"));
-    static const common_regex end_think_regex(regex_escape("</think>"));
-    // Granite models output partial tokens such as "<" and "<think".
-    // By leveraging try_consume_regex()/try_find_regex() throwing
-    // common_chat_msg_partial_exception for these partial tokens,
-    // processing is interrupted and the tokens are not passed to add_content().
-    if (auto res = builder.try_consume_regex(start_think_regex)) {
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-        builder.try_find_regex(end_think_regex, std::string::npos, false);
-        // Restore position for try_parse_reasoning()
-        builder.move_to(res->groups[0].begin);
-    }
-    builder.try_parse_reasoning("<think>", "</think>");
-
-    // Parse response tags
-    static const common_regex start_response_regex(regex_escape("<response>"));
-    static const common_regex end_response_regex(regex_escape("</response>"));
-    // Granite models output partial tokens such as "<" and "<response".
-    // Same hack as reasoning parsing.
-    if (builder.try_consume_regex(start_response_regex)) {
-        builder.try_find_regex(end_response_regex);
-    }
-
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
-            if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-        }
-    } else {
-        builder.add_content(builder.consume_rest());
-    }
-}
-
-static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<think>", "</think>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        // Expect JSON array of tool calls
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            if (!builder.try_consume_literal("</TOOLCALL>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            builder.add_tool_calls(tool_calls_data.json);
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
-    // Parse thinking tags
-    builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // Look for tool calls
-    static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
-    if (auto res = builder.try_find_regex(tool_call_regex)) {
-        builder.move_to(res->groups[0].end);
-
-        auto tool_calls_data = builder.consume_json();
-        if (tool_calls_data.json.is_array()) {
-            builder.consume_spaces();
-            if (!builder.try_consume_literal("<|tools_suffix|>")) {
-                throw common_chat_msg_partial_exception("Incomplete tool call");
-            }
-            for (const auto & value : tool_calls_data.json) {
-                if (value.is_object()) {
-                    builder.add_tool_call_short_form(value);
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Incomplete tool call");
-        }
-    }
-    builder.add_content(builder.consume_rest());
-}
-
-
-static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
-    if (!builder.syntax().parse_tool_calls) {
-        builder.add_content(builder.consume_rest());
-        return;
-    }
-
-    // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
-    static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
-    static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
-
-    // Loop through all tool calls
-    while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
-        builder.move_to(res->groups[0].end);
-
-        // Parse JSON array format: [{"name": "...", "arguments": {...}}]
-        auto tool_calls_data = builder.consume_json();
-
-        // Consume end marker
-        builder.consume_spaces();
-        if (!builder.try_consume_regex(tool_call_end_regex)) {
-            throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
-        }
-
-        // Process each tool call in the array
-        if (tool_calls_data.json.is_array()) {
-            for (const auto & tool_call : tool_calls_data.json) {
-                if (!tool_call.is_object()) {
-                    throw common_chat_msg_partial_exception("Tool call must be an object");
-                }
-
-                if (!tool_call.contains("name")) {
-                    throw common_chat_msg_partial_exception("Tool call missing 'name' field");
-                }
-
-                std::string function_name = tool_call.at("name");
-                std::string arguments = "{}";
-
-                if (tool_call.contains("arguments")) {
-                    if (tool_call.at("arguments").is_object()) {
-                        arguments = tool_call.at("arguments").dump();
-                    } else if (tool_call.at("arguments").is_string()) {
-                        arguments = tool_call.at("arguments");
-                    }
-                }
-
-                if (!builder.add_tool_call(function_name, "", arguments)) {
-                    throw common_chat_msg_partial_exception("Incomplete tool call");
-                }
-            }
-        } else {
-            throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
-        }
-
-        // Consume any trailing whitespace after this tool call
-        builder.consume_spaces();
-    }
-
-    // Consume any remaining content after all tool calls
-    auto remaining = builder.consume_rest();
-    if (!string_strip(remaining).empty()) {
-        builder.add_content(remaining);
-    }
-}
-
-static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<seed:tool_call>",
-        /* form.tool_start  = */ "<function=",
-        /* form.tool_sep    = */ ">",
-        /* form.key_start   = */ "<parameter=",
-        /* form.key_val_sep = */ ">",
-        /* form.val_end     = */ "</parameter>",
-        /* form.tool_end    = */ "</function>",
-        /* form.scope_end   = */ "</seed:tool_call>",
-    };
-    builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
-}
-
-static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
-    builder.try_parse_reasoning("<think>", "</think>");
-    builder.add_content(builder.consume_rest());
-}
-
-static void common_chat_parse(common_chat_msg_parser & builder) {
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
-
-    switch (builder.syntax().format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
-            common_chat_parse_content_only(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GENERIC:
-            common_chat_parse_generic(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
-            common_chat_parse_mistral_nemo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MAGISTRAL:
-            common_chat_parse_magistral(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X:
-            common_chat_parse_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
-            common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
-            common_chat_parse_deepseek_r1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
-            common_chat_parse_deepseek_v3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
-            common_chat_parse_functionary_v3_2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
-            common_chat_parse_functionary_v3_1_llama_3_1(builder);
-            break;
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO:
-            common_chat_parse_hermes_2_pro(builder);
-            break;
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
-            common_chat_parse_firefunction_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_COMMAND_R7B:
-            common_chat_parse_command_r7b(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GRANITE:
-            common_chat_parse_granite(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GPT_OSS:
-            common_chat_parse_gpt_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_SEED_OSS:
-            common_chat_parse_seed_oss(builder);
-            break;
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2:
-            common_chat_parse_nemotron_v2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APERTUS:
-            common_chat_parse_apertus(builder);
-            break;
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
-            common_chat_parse_lfm2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MINIMAX_M2:
-            common_chat_parse_minimax_m2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_GLM_4_5:
-            common_chat_parse_glm_4_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_KIMI_K2:
-            common_chat_parse_kimi_k2(builder);
-            break;
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML:
-            common_chat_parse_qwen3_coder_xml(builder);
-            break;
-        case COMMON_CHAT_FORMAT_APRIEL_1_5:
-            common_chat_parse_apriel_1_5(builder);
-            break;
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
-            common_chat_parse_xiaomi_mimo(builder);
-            break;
-        case COMMON_CHAT_FORMAT_MIROTHINKER:
-            common_chat_parse_mirothinker(builder);
-            break;
-        default:
-            throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
-    }
-    builder.finish();
-}
-
-common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
-        syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
-    }
-    common_chat_msg_parser builder(input, is_partial, syntax);
-    try {
-        common_chat_parse(builder);
-    } catch (const common_chat_msg_partial_exception & ex) {
-        LOG_DBG("Partial parse: %s\n", ex.what());
-        if (!is_partial) {
-            builder.clear_tools();
-            builder.move_to(0);
-            common_chat_parse_content_only(builder);
-        }
-    }
-    auto msg = builder.result();
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
-
-common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax) {
-    if (parser.empty()) {
-        throw std::runtime_error("Failed to parse due to missing parser definition.");
-    }
-
-    LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
-
-    common_peg_parse_context ctx(input, is_partial);
-    auto result = parser.parse(ctx);
-    if (result.fail()) {
-        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
-    }
-
-    common_chat_msg msg;
-    msg.role = "assistant";
-
-    if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
-        auto mapper = common_chat_peg_native_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
-        auto mapper = common_chat_peg_constructed_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    } else {
-        // Generic mapper
-        auto mapper = common_chat_peg_mapper(msg);
-        mapper.from_ast(ctx.ast, result);
-    }
-    if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
-    }
-    return msg;
-}
diff --git a/common/chat-parser.h b/common/chat-parser.h
deleted file mode 100644
index cb70d16e01..0000000000
--- a/common/chat-parser.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#pragma once
-
-#include "chat.h"
-#include "chat-parser-xml-toolcall.h"
-#include "json-partial.h"
-#include "regex-partial.h"
-
-#include <optional>
-#include <string>
-#include <vector>
-
-class common_chat_msg_partial_exception : public std::runtime_error {
-  public:
-    common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
-};
-
-class common_chat_msg_parser {
-    std::string input_;
-    bool is_partial_;
-    common_chat_syntax syntax_;
-    std::string healing_marker_;
-
-    size_t pos_ = 0;
-    common_chat_msg result_;
-
-  public:
-    common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
-    const std::string & input() const { return input_; }
-    size_t pos() const { return pos_; }
-    const std::string & healing_marker() const { return healing_marker_; }
-    const bool & is_partial() const { return is_partial_; }
-    const common_chat_msg & result() const { return result_; }
-    const common_chat_syntax & syntax() const { return syntax_; }
-
-    void move_to(size_t pos) {
-        if (pos > input_.size()) {
-            throw std::runtime_error("Invalid position!");
-        }
-        pos_ = pos;
-    }
-    void move_back(size_t n) {
-        if (pos_ < n) {
-            throw std::runtime_error("Can't move back that far!");
-        }
-        pos_ -= n;
-    }
-
-    // Get the substring of the input at the given range
-    std::string str(const common_string_range & rng) const;
-
-    // Appends to the result.content field
-    void add_content(const std::string & content);
-
-    // Appends to the result.reasoning_content field
-    void add_reasoning_content(const std::string & reasoning_content);
-
-    // Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
-    bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
-
-    // Adds a tool call using the "name", "id" and "arguments" fields of the json object
-    bool add_tool_call(const nlohmann::ordered_json & tool_call);
-
-    // Adds an array of tool calls using their "name", "id" and "arguments" fields.
-    bool add_tool_calls(const nlohmann::ordered_json & arr);
-
-    // Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
-    bool add_tool_call_short_form(const nlohmann::ordered_json& tool_call);
-
-    void finish();
-
-    bool consume_spaces();
-
-    void consume_literal(const std::string & literal);
-
-    bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
-
-    std::string consume_rest();
-
-    struct find_regex_result {
-        std::string prelude;
-        std::vector<common_string_range> groups;
-    };
-
-    std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
-
-    bool try_consume_literal(const std::string & literal);
-
-    std::optional<find_regex_result> try_find_literal(const std::string & literal);
-
-    find_regex_result consume_regex(const common_regex & regex);
-
-    std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
-
-    std::optional<common_json> try_consume_json();
-    common_json consume_json();
-
-    struct consume_json_result {
-        nlohmann::ordered_json value;
-        bool is_partial;
-    };
-
-    /*
-        Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
-
-        By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
-        e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
-
-        But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
-        - with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
-        - with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
-    */
-    consume_json_result consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-    std::optional<consume_json_result> try_consume_json_with_dumped_args(
-        const std::vector<std::vector<std::string>> & args_paths = {},
-        const std::vector<std::vector<std::string>> & content_paths = {}
-    );
-
-    /**
-     * Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
-     * form.scope_start, form.tool_sep and form.scope_end can be empty.
-     */
-    bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
-
-    // Parse content uses reasoning and XML-Style tool call
-    void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
-
-    void clear_tools();
-};
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
index 1bcba9cd86..624dee22fb 100644
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -1,13 +1,17 @@
 #include "chat-peg-parser.h"
 
+#include "chat-auto-parser.h"
+#include "ggml.h"
+#include "peg-parser.h"
+
 #include <nlohmann/json.hpp>
 
-using json = nlohmann::json;
+using ordered_json = nlohmann::ordered_json;
 
 static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     int count = 0;
     while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
-        if (max != -1 && count <= max) {
+        if (max != -1 && count >= max) {
             break;
         }
         sv.remove_suffix(1);
@@ -16,109 +20,988 @@ static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
     return sv;
 }
 
-void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+static std::string_view trim_leading_space(std::string_view sv, int max = -1) {
+    int count = 0;
+    while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front()))) {
+        if (max != -1 && count >= max) {
+            break;
+        }
+        sv.remove_prefix(1);
+        count++;
+    }
+    return sv;
+}
+
+static std::string_view trim(std::string_view sv) {
+    return trim_trailing_space(trim_leading_space(sv, 1));
+}
+
+// Count the number of unclosed '{' braces in a JSON-like string,
+// properly skipping braces inside quoted strings.
+static int json_brace_depth(const std::string & s) {
+    int  depth     = 0;
+    bool in_string = false;
+    bool escaped   = false;
+    for (char c : s) {
+        if (escaped) {
+            escaped = false;
+            continue;
+        }
+        if (c == '\\' && in_string) {
+            escaped = true;
+            continue;
+        }
+        if (c == '"') {
+            in_string = !in_string;
+            continue;
+        }
+        if (!in_string) {
+            if (c == '{') {
+                depth++;
+            } else if (c == '}') {
+                depth--;
+            }
+        }
+    }
+    return depth;
+}
+
+// JSON-escape a string and return the inner content (without surrounding quotes).
+static std::string escape_json_string_inner(const std::string & s) {
+    std::string escaped = ordered_json(s).dump();
+    if (escaped.size() >= 2 && escaped.front() == '"' && escaped.back() == '"') {
+        return escaped.substr(1, escaped.size() - 2);
+    }
+    return escaped;
+}
+
+// Convert Python-style single-quoted strings to JSON double-quoted strings
+// Only converts outer string delimiters, properly handling escape sequences:
+// - {'key': 'value'} -> {"key": "value"}
+// - {'code': 'print(\'hello\')'} -> {"code": "print('hello')"}
+// - {'msg': 'He said "hi"'} -> {"msg": "He said \"hi\""}
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);  // May need extra space for escaping
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        // Handle escape sequences
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                // Inside a single-quoted string being converted to double quotes
+                if (next == '\'') {
+                    // \' -> ' (escaped single quote becomes unescaped in double-quoted string)
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    // \" stays as \" (already escaped, works in double-quoted string)
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                // Other escapes (\n, \\, etc.): pass through both characters
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                // Inside a double-quoted string - pass through escape sequences as-is
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            // Outside any string - just pass through the backslash
+            result += c;
+            continue;
+        }
+
+        // Handle quote characters
+        if (c == '"') {
+            if (in_single_quoted) {
+                // Unescaped double quote inside single-quoted string -> must escape for JSON
+                result += "\\\"";
+            } else {
+                // Double quote as string delimiter or outside strings
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                // Single quote inside double-quoted string -> pass through
+                result += c;
+            } else if (in_single_quoted) {
+                // Closing single quote -> convert to double quote
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                // Opening single quote -> convert to double quote
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
+void tag_based_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
     arena.visit(result, [this](const common_peg_ast_node & node) {
-        map(node);
+        if (!node.tag.empty()) {
+            tags[node.tag] = std::string(node.text);
+        }
     });
 }
 
+tagged_parse_result tagged_peg_parser::parse_and_extract(const std::string & input, common_peg_parse_flags extra_flags) const {
+    common_peg_parse_context ctx(input, flags | extra_flags);
+    auto parse_result = arena.parse(ctx);
+
+    tag_based_peg_mapper mapper;
+    mapper.from_ast(ctx.ast, parse_result);
+
+    return { std::move(parse_result), std::move(mapper.tags) };
+}
+
+tagged_parse_result tagged_peg_parser::parse_anywhere_and_extract(const std::string & input) const {
+    if (input.empty()) {
+        return parse_and_extract(input);
+    }
+    for (size_t i = 0; i < input.size(); i++) {
+        common_peg_parse_context ctx(input, flags);
+        auto parse_result = arena.parse(ctx, i);
+        if (parse_result.success() || i == input.size() - 1) {
+            tag_based_peg_mapper mapper;
+            mapper.from_ast(ctx.ast, parse_result);
+            return { std::move(parse_result), std::move(mapper.tags) };
+        }
+    }
+    GGML_ABORT("Should not happen");
+}
+
+tagged_peg_parser build_tagged_peg_parser(
+    const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn) {
+    common_peg_parser_builder builder;
+    builder.set_root(fn(builder));
+    return { builder.build() };
+}
+
+common_peg_parser common_chat_peg_builder::tag_with_safe_content(const std::string &       tag_name,
+                                                                 const std::string &       marker,
+                                                                 const common_peg_parser & p) {
+    if (marker.empty()) {
+        return zero_or_more(choice({ p, rule(tag_name, content(any())) }));
+    }
+    auto content_chunk = rule(tag_name, content(negate(literal(marker)) + any() + until(marker)));
+    return zero_or_more(choice({ p, content_chunk }));
+}
+
+std::string & common_chat_peg_mapper::args_target() {
+    return (current_tool && !current_tool->name.empty()) ? current_tool->arguments : args_buffer;
+}
+
+std::string common_chat_peg_mapper::normalize_container_value(const std::string & input) {
+    return normalize_quotes_to_json(input);
+}
+
+void common_chat_peg_mapper::from_ast(const common_peg_ast_arena &    arena,
+                                      const common_peg_parse_result & parse_result_arg) {
+    arena.visit(parse_result_arg, [this](const common_peg_ast_node & node) { map(node); });
+    // Flush any pending tool call that was started but never got a name
+    // This happens during partial parsing when the tool call is incomplete
+    if (pending_tool_call.has_value() && !pending_tool_call->name.empty()) {
+        if (!args_buffer.empty()) {
+            pending_tool_call->arguments = args_buffer;
+        }
+        if (closing_quote_pending && !pending_tool_call->arguments.empty()) {
+            pending_tool_call->arguments += "\"";
+        }
+        result.tool_calls.push_back(pending_tool_call.value());
+        pending_tool_call.reset();
+    }
+
+    // Discard whitespace-only reasoning content (e.g. from <think></think> prefill)
+    if (!result.reasoning_content.empty()) {
+        bool all_whitespace = true;
+        for (char c : result.reasoning_content) {
+            if (c != ' ' && c != '\n' && c != '\r' && c != '\t') {
+                all_whitespace = false;
+                break;
+            }
+        }
+        if (all_whitespace) {
+            result.reasoning_content.clear();
+        }
+    }
+}
+
 void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
+    // Handle reasoning/content tags
     bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
-    bool is_content = node.tag == common_chat_peg_builder::CONTENT;
+    bool is_content   = node.tag == common_chat_peg_builder::CONTENT;
 
-    if (is_reasoning) {
-        result.reasoning_content = std::string(trim_trailing_space(node.text));
+    if (is_reasoning) { // GPT OSS can have more than 1 reasoning block, so concatenate here
+        result.reasoning_content += std::string(node.text);
     }
 
     if (is_content) {
-        result.content = std::string(trim_trailing_space(node.text));
+        // Concatenate content from multiple content nodes (e.g., when reasoning markers
+        // are preserved before content markers in reasoning_format=NONE mode)
+        result.content += std::string(node.text);
     }
-}
-
-void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
 
-    bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
-    bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
-    bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
+    // Handle tool-related tags (supporting both JSON and tagged formats)
+    bool is_tool_open  = node.tag == common_chat_peg_builder::TOOL_OPEN;
+    bool is_tool_close = node.tag == common_chat_peg_builder::TOOL_CLOSE;
+    bool is_tool_name  = node.tag == common_chat_peg_builder::TOOL_NAME;
+    bool is_tool_id    = node.tag == common_chat_peg_builder::TOOL_ID;
+    bool is_tool_args  = node.tag == common_chat_peg_builder::TOOL_ARGS;
+    bool is_arg_open   = node.tag == common_chat_peg_builder::TOOL_ARG_OPEN;
+    bool is_arg_close  = node.tag == common_chat_peg_builder::TOOL_ARG_CLOSE;
+    bool is_arg_name         = node.tag == common_chat_peg_builder::TOOL_ARG_NAME;
+    bool is_arg_value        = node.tag == common_chat_peg_builder::TOOL_ARG_VALUE;
+    bool is_arg_string_value = node.tag == common_chat_peg_builder::TOOL_ARG_STRING_VALUE;
 
     if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
+        pending_tool_call     = common_chat_tool_call();
+        current_tool          = &pending_tool_call.value();
+        arg_count             = 0;
+        args_buffer.clear();
+        closing_quote_pending = false;
     }
 
     if (is_tool_id && current_tool) {
-        current_tool->id = std::string(trim_trailing_space(node.text));
+        auto text = trim_trailing_space(node.text);
+        if (text.size() >= 2 && text.front() == '"' && text.back() == '"') {
+            text = text.substr(1, text.size() - 2);
+        }
+        current_tool->id = std::string(text);
     }
 
     if (is_tool_name && current_tool) {
         current_tool->name = std::string(trim_trailing_space(node.text));
+        // Now that we have the name, populate the arguments from the buffer
+        if (!args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        } else if (current_tool->arguments.empty()) {
+            current_tool->arguments = "{";
+        }
+        // Add the tool call to results so streaming can see it
+        if (pending_tool_call.has_value()) {
+            result.tool_calls.push_back(pending_tool_call.value());
+            pending_tool_call.reset();
+            current_tool = &result.tool_calls.back();
+        }
     }
 
     if (is_tool_args && current_tool) {
-        current_tool->arguments = std::string(trim_trailing_space(node.text));
+        // For JSON format: arguments come as a complete JSON object
+        // For tagged format: built up from individual arg_name/arg_value nodes
+        auto text = trim_trailing_space(node.text);
+        if (!text.empty() && text.front() == '{') {
+            args_target() = std::string(text);
+        }
+    }
+
+    if (is_arg_open) {
+        closing_quote_pending = false;
+    }
+
+    if (is_arg_name && current_tool) {
+        std::string arg_entry;
+        if (arg_count > 0) {
+            arg_entry = ",";
+        }
+        arg_entry += ordered_json(trim(node.text)).dump() + ":";
+        ++arg_count;
+
+        auto & target = args_target();
+        if (target.empty()) {
+            target = "{";
+        }
+        target += arg_entry;
+    }
+
+    if ((is_arg_value || is_arg_string_value) && current_tool) {
+        std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+
+        std::string value_to_add;
+        if (value_content.empty() && is_arg_string_value) {
+            // Empty string value - arg_close will add the closing quote
+            value_to_add          = "\"";
+            closing_quote_pending = true;
+        } else if (!value_content.empty() && is_arg_string_value) {
+            // Schema declares this as string type - always treat as literal string value
+            if (!closing_quote_pending) {
+                value_to_add          = "\"";
+                closing_quote_pending = true;
+            }
+            value_to_add += escape_json_string_inner(value_content);
+        } else if (!value_content.empty()) {
+            // For potential containers, normalize Python-style single quotes to JSON double quotes
+            bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
+            if (is_potential_container) {
+                value_content = normalize_container_value(value_content);
+            }
+
+            // Try to parse as JSON value (number, bool, null, object, array)
+            try {
+                ordered_json parsed = ordered_json::parse(value_content);
+                if (parsed.is_string()) {
+                    // Don't add closing quote yet (added by arg_close) for monotonic streaming
+                    std::string escaped = parsed.dump();
+                    if (!escaped.empty() && escaped.back() == '"') {
+                        escaped.pop_back();
+                    }
+                    value_to_add          = escaped;
+                    closing_quote_pending = true;
+                } else {
+                    // Non-string values: use raw content to preserve whitespace for monotonicity
+                    value_to_add = value_content;
+                }
+            } catch (...) {
+                if (node.is_partial && is_potential_container) {
+                    // Partial container: pass through the already-normalized content
+                    value_to_add = value_content;
+                } else {
+                    // Not valid JSON - treat as string value
+                    if (!closing_quote_pending) {
+                        value_to_add          = "\"";
+                        closing_quote_pending = true;
+                    }
+                    value_to_add += escape_json_string_inner(value_content);
+                }
+            }
+        }
+
+        args_target() += value_to_add;
+    }
+
+    if (is_arg_close && current_tool) {
+        if (closing_quote_pending) {
+            args_target() += "\"";
+            closing_quote_pending = false;
+        }
+    }
+
+    if (is_tool_close && current_tool) {
+        // Flush buffer to arguments if tool name was never seen
+        if (current_tool->name.empty() && !args_buffer.empty()) {
+            current_tool->arguments = args_buffer;
+            args_buffer.clear();
+        }
+        // Close any pending string quote
+        if (closing_quote_pending) {
+            current_tool->arguments += "\"";
+            closing_quote_pending = false;
+        }
+        // Close any unclosed braces (accounts for nested objects)
+        for (int d = json_brace_depth(current_tool->arguments); d > 0; d--) {
+            current_tool->arguments += "}";
+        }
+        // Add tool call to results if named; otherwise discard
+        if (pending_tool_call.has_value()) {
+            if (!current_tool->name.empty()) {
+                result.tool_calls.push_back(pending_tool_call.value());
+            }
+            pending_tool_call.reset();
+        }
     }
 }
 
-void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
-    common_chat_peg_mapper::map(node);
+common_peg_parser common_chat_peg_builder::standard_constructed_tools(
+    const std::map<std::string, std::string> & markers,
+    const ordered_json &                       tools,
+    bool                                       parallel_tool_calls,
+    bool                                       force_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
 
-    bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
-    bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
-    bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
-    bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
-    bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
-    bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
-    bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
-    bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
+    // Extract markers with defaults
+    auto get_marker = [&markers](const std::string & key, const std::string & default_val = "") -> std::string {
+        auto it = markers.find(key);
+        return it != markers.end() ? it->second : default_val;
+    };
 
-    if (is_tool_open) {
-        result.tool_calls.emplace_back();
-        current_tool = &result.tool_calls.back();
-        arg_count = 0;
+    std::string section_start    = get_marker("tool_call_start_marker", "<tool_call>");
+    std::string section_end      = get_marker("tool_call_end_marker", "</tool_call>");
+    std::string func_opener      = get_marker("function_opener", "<function=");
+    std::string func_name_suffix = get_marker("function_name_suffix", ">");
+    std::string func_closer      = get_marker("function_closer", "</function>");
+    std::string param_key_prefix = get_marker("parameter_key_prefix", "<param=");
+    std::string param_key_suffix = get_marker("parameter_key_suffix", ">");
+    std::string param_closer     = get_marker("parameter_closer", "</param>");
+
+    // Build tool choices for tagged format
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        ordered_json   params   = function.contains("parameters") ? function.at("parameters") : ordered_json::object();
+
+        // Build argument parsers
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+
+                auto arg_name_parser =
+                    choice({ literal(prop_name), literal("\"" + prop_name + "\""), literal("'" + prop_name + "'") });
+
+                auto arg_rule = tool_arg(tool_arg_open(literal(param_key_prefix)) + tool_arg_name(arg_name_parser) +
+                                         literal(param_key_suffix) + tool_arg_value(until(param_closer)) +
+                                         tool_arg_close(literal(param_closer)));
+                arg_choice |= arg_rule;
+            }
+            args = zero_or_more(arg_choice + space());
+        }
+
+        // Build function parser: <function=name>args</function>
+        auto tool_parser = tool(tool_open(literal(func_opener) + tool_name(literal(name)) + literal(func_name_suffix)) +
+                                space() + tool_args(args) + space() + tool_close(literal(func_closer)));
+
+        tool_choices |= rule("tool-" + name, tool_parser);
     }
 
-    if (is_tool_name) {
-        current_tool->name = std::string(node.text);
-        current_tool->arguments = "{";
+    // Build the section with markers
+    auto section =
+        parallel_tool_calls ?
+            trigger_rule("tool-call", literal(section_start) + space() + one_or_more(tool_choices + space()) +
+                                          literal(section_end)) :
+            trigger_rule("tool-call", literal(section_start) + space() + tool_choices + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+// Python-style tool calls: name(arg1="value1", arg2=123)
+// Used only by LFM2 for now, so we don't merge it into autoparser
+common_peg_parser common_chat_peg_builder::python_style_tool_calls(
+    const ordered_json & tools,
+    bool                 parallel_tool_calls) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
     }
 
-    if (is_arg_open) {
-        needs_closing_quote = false;
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        ordered_json   params   = function.contains("parameters") ? function.at("parameters") : ordered_json::object();
+
+        auto args = eps();
+        if (params.contains("properties") && !params["properties"].empty()) {
+            auto arg_choice = choice();
+            for (const auto & el : params["properties"].items()) {
+                const std::string & prop_name = el.key();
+                const auto & prop_def = el.value();
+                bool is_string_type = (prop_def.contains("type") && prop_def["type"] == "string");
+
+                auto arg_name_parser = literal(prop_name);
+
+                common_peg_parser arg_value_parser = eps();
+                auto string_value_parser = choice({
+                    literal("\"") + tool_arg_string_value(string_content('"')) + literal("\""),
+                    literal("'") + tool_arg_string_value(string_content('\'')) + literal("'")
+                });
+
+                if (is_string_type) {
+                    arg_value_parser = string_value_parser;
+                } else {
+                    arg_value_parser = tool_arg_value(python_value());
+                }
+
+                // Full argument: name="value" or name=value
+                auto arg_rule = tool_arg(
+                    tool_arg_open(eps()) +
+                    tool_arg_name(arg_name_parser) +
+                    literal("=") +
+                    arg_value_parser +
+                    tool_arg_close(eps())
+                );
+                arg_choice |= arg_rule;
+            }
+
+            args = arg_choice + zero_or_more("," + space() + arg_choice);
+        }
+
+        auto tool_parser = tool(tool_open(tool_name(literal(name)) + literal("(")) +
+            space() + tool_args(args) + space() + tool_close(literal(")"))
+        );
+
+        tool_choices |= rule("tool-" + name, tool_parser);
     }
 
-    if (is_arg_name && current_tool) {
-        if (arg_count > 0) {
-            current_tool->arguments += ",";
+    if (parallel_tool_calls) {
+        return "[" + space() + tool_choices + zero_or_more("," + space() + tool_choices) + space() + "]";
+    }
+    return "[" + space() + tool_choices + space() + "]";
+}
+
+// Helper: Parse dot notation key into prefix and field name
+static std::pair<std::string, std::string> parse_key_spec(const std::string & key) {
+    auto dot_pos = key.find('.');
+    if (dot_pos == std::string::npos) {
+        return {"", key};  // Top-level field
+    }
+    return {key.substr(0, dot_pos), key.substr(dot_pos + 1)};
+}
+
+// Mode 1: function_is_key — parse {"function_name": {...}}
+common_peg_parser common_chat_peg_builder::build_json_tools_function_is_key(
+    const ordered_json & tools,
+    const std::string &  args_key,
+    const std::string &  effective_args_key,
+    const std::string &  call_id_key,
+    const std::string &  gen_call_id_key) {
+
+    auto tool_choices = choice();
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
         }
-        current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
-        ++arg_count;
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        ordered_json   params   = function.contains("parameters") ? function.at("parameters") : ordered_json::object();
+
+        // Build inner object fields
+        std::vector<common_peg_parser> inner_fields;
+
+        if (!call_id_key.empty()) {
+            auto id_parser = atomic(
+                literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                literal("\"") + tool_id(string_content('"')) + literal("\"")
+            );
+            inner_fields.push_back(optional(id_parser + space() + optional(literal(",") + space())));
+        }
+
+        if (!gen_call_id_key.empty()) {
+            auto gen_id_parser = atomic(
+                literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(string_content('"')) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+            inner_fields.push_back(optional(gen_id_parser + space() + optional(literal(",") + space())));
+        }
+
+        // Arguments — either wrapped in args_key or parsed directly
+        common_peg_parser args_parser = eps();
+        if (args_key.empty()) {
+            args_parser = tool_args(schema(json(), "tool-" + name + "-schema", params));
+        } else {
+            args_parser = literal("\"" + effective_args_key + "\"") + space() + literal(":") + space() +
+                          tool_args(schema(json(), "tool-" + name + "-schema", params));
+        }
+        inner_fields.push_back(args_parser);
+
+        // Build inner object parser
+        common_peg_parser inner_object = eps();
+        if (args_key.empty() && inner_fields.size() == 1) {
+            inner_object = inner_fields[0];
+        } else {
+            inner_object = literal("{") + space();
+            for (size_t i = 0; i < inner_fields.size(); i++) {
+                inner_object = inner_object + inner_fields[i];
+                if (i < inner_fields.size() - 1) {
+                    inner_object = inner_object + space();
+                }
+            }
+            inner_object = inner_object + space() + literal("}");
+        }
+
+        auto tool_parser = tool(
+            tool_open(literal("{")) + space() +
+            literal("\"") + tool_name(literal(name)) + literal("\"") +
+            space() + literal(":") + space() +
+            inner_object +
+            space() + tool_close(literal("}"))
+        );
+
+        tool_choices |= rule("tool-" + name, tool_parser);
     }
 
-    if (is_arg_string && current_tool) {
-        // Serialize to JSON, but exclude the end quote
-        std::string dumped = json(trim_trailing_space(node.text)).dump();
-        current_tool->arguments += dumped.substr(0, dumped.size() - 1);
-        needs_closing_quote = true;
+    return tool_choices;
+}
+
+// Mode 2: Nested keys (dot notation like "function.name")
+common_peg_parser common_chat_peg_builder::build_json_tools_nested_keys(
+    const ordered_json & tools,
+    const std::string &  effective_name_key,
+    const std::string &  effective_args_key,
+    const std::string &  call_id_key,
+    const std::string &  gen_call_id_key) {
+
+    auto tool_choices = choice();
+
+    auto name_spec = parse_key_spec(effective_name_key);
+    auto args_spec = parse_key_spec(effective_args_key);
+
+    std::string nested_prefix     = !name_spec.first.empty() ? name_spec.first  : args_spec.first;
+    std::string nested_name_field = !name_spec.first.empty() ? name_spec.second  : effective_name_key;
+    std::string nested_args_field = !args_spec.first.empty() ? args_spec.second  : effective_args_key;
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
+        }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        ordered_json   params   = function.contains("parameters") ? function.at("parameters") : ordered_json::object();
+
+        auto nested_name = literal("\"" + nested_name_field + "\"") + space() + literal(":") + space() +
+                          literal("\"") + tool_name(literal(name)) + literal("\"");
+        auto nested_args = literal("\"" + nested_args_field + "\"") + space() + literal(":") + space() +
+                          tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+        auto nested_object = literal("{") + space() +
+                            nested_name + space() + literal(",") + space() +
+                            nested_args +
+                            space() + literal("}");
+
+        // Format: { id?, "function": {...} }
+        auto tool_parser_body = tool_open(literal("{")) + space();
+
+        if (!call_id_key.empty()) {
+            auto id_spec = parse_key_spec(call_id_key);
+            if (id_spec.first.empty()) {
+                auto id_parser = atomic(
+                    literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                    literal("\"") + tool_id(string_content('"')) + literal("\"")
+                );
+                tool_parser_body = tool_parser_body + optional(id_parser + space() + literal(",") + space());
+            }
+        }
+
+        if (!gen_call_id_key.empty()) {
+            auto gen_id_spec = parse_key_spec(gen_call_id_key);
+            if (gen_id_spec.first.empty()) {
+                auto gen_id_parser = atomic(
+                    literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                    choice({
+                        literal("\"") + tool_id(string_content('"')) + literal("\""),
+                        tool_id(json_number())
+                    })
+                );
+                tool_parser_body = tool_parser_body + optional(gen_id_parser + space() + literal(",") + space());
+            }
+        }
+
+        auto nested_field = literal("\"" + nested_prefix + "\"") + space() + literal(":") + space() + nested_object;
+        tool_parser_body = tool_parser_body + nested_field + space() + tool_close(literal("}"));
+
+        tool_choices |= rule("tool-" + name, tool(tool_parser_body));
     }
 
-    if (is_arg_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+    return tool_choices;
+}
+
+// Mode 3: Flat keys with optional ID fields and parameter ordering
+common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
+    const ordered_json &             tools,
+    const std::string &              effective_name_key,
+    const std::string &              effective_args_key,
+    const std::string &              call_id_key,
+    const std::string &              gen_call_id_key,
+    const std::vector<std::string> & parameters_order) {
+
+    auto tool_choices    = choice();
+    auto name_key_parser = literal("\"" + effective_name_key + "\"");
+    auto args_key_parser = literal("\"" + effective_args_key + "\"");
+
+    for (const auto & tool_def : tools) {
+        if (!tool_def.contains("function")) {
+            continue;
         }
+        const auto &   function = tool_def.at("function");
+        std::string    name     = function.at("name");
+        ordered_json   params   = function.contains("parameters") ? function.at("parameters") : ordered_json::object();
+
+        auto tool_name_ = name_key_parser + space() + literal(":") + space() +
+                         literal("\"") + tool_name(literal(name)) + literal("\"");
+        auto tool_args_ = args_key_parser + space() + literal(":") + space() +
+                         tool_args(schema(json(), "tool-" + name + "-schema", params));
+
+        // Build ID parsers if keys are provided
+        common_peg_parser id_parser = eps();
+        if (!call_id_key.empty()) {
+            id_parser = atomic(
+                literal("\"" + call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(string_content('"')) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+        }
+
+        common_peg_parser gen_id_parser = eps();
+        if (!gen_call_id_key.empty()) {
+            gen_id_parser = atomic(
+                literal("\"" + gen_call_id_key + "\"") + space() + literal(":") + space() +
+                choice({
+                    literal("\"") + tool_id(string_content('"')) + literal("\""),
+                    tool_id(json_number())
+                })
+            );
+        }
+
+        // Create (parser, key) pairs for all fields, then sort by parameters_order
+        std::vector<std::pair<common_peg_parser, std::string>> parser_pairs;
+        parser_pairs.emplace_back(tool_name_, effective_name_key);
+        parser_pairs.emplace_back(tool_args_, effective_args_key);
+        if (!call_id_key.empty()) {
+            parser_pairs.emplace_back(optional(id_parser), call_id_key);
+        }
+        if (!gen_call_id_key.empty()) {
+            parser_pairs.emplace_back(optional(gen_id_parser), gen_call_id_key);
+        }
+
+        std::sort(parser_pairs.begin(), parser_pairs.end(),
+            [&parameters_order](const auto & a, const auto & b) {
+                auto pos_a = std::find(parameters_order.begin(), parameters_order.end(), a.second);
+                auto pos_b = std::find(parameters_order.begin(), parameters_order.end(), b.second);
+                size_t idx_a = (pos_a == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_a);
+                size_t idx_b = (pos_b == parameters_order.end()) ? parameters_order.size() : std::distance(parameters_order.begin(), pos_b);
+                return idx_a < idx_b;
+            });
+
+        auto ordered_body = tool_open(literal("{")) + space();
+        for (size_t i = 0; i < parser_pairs.size(); i++) {
+            ordered_body = ordered_body + parser_pairs[i].first;
+            if (i < parser_pairs.size() - 1) {
+                ordered_body = ordered_body + space() + literal(",") + space();
+            }
+        }
+        ordered_body = ordered_body + space() + tool_close(literal("}"));
+
+        tool_choices |= rule("tool-" + name, tool(ordered_body));
     }
 
-    if (is_arg_json && current_tool) {
-        current_tool->arguments += std::string(trim_trailing_space(node.text));
+    return tool_choices;
+}
+
+common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const std::string & delimiter) {
+    if (s.empty()) {
+        return eps();
     }
+    if (delimiter.empty()) {
+        return literal(s);
+    }
+    return literal(s.substr(0, s.rfind(delimiter)));
+}
 
-    if (is_tool_close && current_tool) {
-        if (needs_closing_quote) {
-            current_tool->arguments += "\"";
-            needs_closing_quote = false;
+common_peg_parser common_chat_peg_builder::standard_json_tools(
+                                                       const std::string &              section_start,
+                                                       const std::string &              section_end,
+                                                       const ordered_json &             tools,
+                                                       bool                             parallel_tool_calls,
+                                                       bool                             force_tool_calls,
+                                                       const std::string &              name_key,
+                                                       const std::string &              args_key,
+                                                       bool                             array_wrapped,
+                                                       bool                             function_is_key,
+                                                       const std::string &              call_id_key,
+                                                       const std::string &              gen_call_id_key,
+                                                       const std::vector<std::string> & parameters_order) {
+    if (!tools.is_array() || tools.empty()) {
+        return eps();
+    }
+
+    std::string effective_name_key = name_key.empty() ? "name" : name_key;
+    std::string effective_args_key = args_key.empty() ? "arguments" : args_key;
+
+    // Dispatch to the appropriate builder based on the JSON layout mode
+    common_peg_parser tool_choices = eps();
+    if (function_is_key) {
+        tool_choices = build_json_tools_function_is_key(tools, args_key, effective_args_key, call_id_key, gen_call_id_key);
+    } else {
+        auto name_spec = parse_key_spec(effective_name_key);
+        auto args_spec = parse_key_spec(effective_args_key);
+        if (!name_spec.first.empty() || !args_spec.first.empty()) {
+            tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
+        } else {
+            tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
         }
-        current_tool->arguments += "}";
+    }
+
+    // Build the section with markers
+    auto tool_calls = tool_choices;
+    if (parallel_tool_calls) {
+        tool_calls = tool_calls + zero_or_more(space() + literal(",") + space() + tool_choices);
+    }
+
+    if (array_wrapped) {
+        tool_calls = literal("[") + space() + tool_calls + space() + literal("]");
+    }
+
+    auto section =
+        trigger_rule("tool-call", literal(section_start) + space() + tool_calls + space() + literal(section_end));
+
+    return force_tool_calls ? section : optional(section);
+}
+
+void common_chat_peg_gemma4_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
+    for (const auto & node : result.nodes) {
+        visit(arena, node);
+    }
+}
+
+static std::string gemma4_to_json(const common_peg_ast_arena & arena, common_peg_ast_id id) {
+    const auto & node = arena.get(id);
+
+    if (node.text.empty()) {
+        return "";
+    }
+
+    if (node.rule == "gemma4-number" || node.rule == "gemma4-bool" || node.rule == "gemma4-null") {
+        return std::string(node.text);
+    }
+
+    if (node.rule == "gemma4-string-content") {
+        return escape_json_string_inner(std::string(node.text));
+    }
+
+    if (node.rule == "gemma4-string") {
+        std::string result = "\"";
+        if (!node.children.empty()) {
+            result += gemma4_to_json(arena, node.children[0]);
+            if (!node.is_partial) {
+                result += "\"";
+            }
+        }
+        return result;
+    }
+
+    if (node.rule == "gemma4-array") {
+        std::string result = "[";
+
+        bool add_comma = false;
+        for (auto child_id : node.children) {
+            if (add_comma) {
+                result += ',';
+            }
+            add_comma = true;
+            result += gemma4_to_json(arena, child_id);
+        }
+
+        if (!node.is_partial) {
+            result += ']';
+        }
+        return result;
+    }
+
+    if (node.rule == "gemma4-dict-key-name") {
+        return std::string(node.text);
+    }
+
+    if (node.rule == "gemma4-dict-key") {
+        std::string result = "\"";
+        if (!node.children.empty()) {
+            result += escape_json_string_inner(gemma4_to_json(arena, node.children[0]));
+        }
+        if (!node.is_partial) {
+            result += "\":";
+        }
+        return result;
+    }
+
+    if (node.rule == "gemma4-dict-kv") {
+        std::string result;
+        for (auto child_id : node.children) {
+            result += gemma4_to_json(arena, child_id);
+        }
+        return result;
+    }
+
+    if (node.rule == "gemma4-dict") {
+        std::string result = "{";
+
+        bool add_comma = false;
+        for (auto child_id : node.children) {
+            if (add_comma) {
+                result += ',';
+            }
+            add_comma = true;
+            result += gemma4_to_json(arena, child_id);
+        }
+
+        if (!node.is_partial) {
+            result += '}';
+        }
+        return result;
+    }
+
+    if (node.rule == "gemma4-value") {
+        if (!node.children.empty()) {
+            return gemma4_to_json(arena, node.children[0]);
+        }
+        return "";
+    }
+
+    return "";
+}
+
+void common_chat_peg_gemma4_mapper::visit(const common_peg_ast_arena & arena, common_peg_ast_id id) {
+    const auto & node = arena.get(id);
+
+    if (node.tag == "reasoning") {
+        result.reasoning_content += std::string(node.text);
+        return;
+    }
+
+    if (node.tag == "content") {
+        result.content += std::string(node.text);
+        return;
+    }
+
+    if (node.tag == "tool") {
+        auto name_id = arena.find_by_tag(node, "tool-name");
+        auto args_id = arena.find_by_tag(node, "tool-args");
+
+        if (name_id != COMMON_PEG_INVALID_AST_ID && args_id != COMMON_PEG_INVALID_AST_ID) {
+            const auto & name_node = arena.get(name_id);
+            const auto & args_node = arena.get(args_id);
+
+            if (!name_node.is_partial) {
+                common_chat_tool_call call;
+                call.name = std::string(name_node.text);
+                if (!args_node.children.empty()) {
+                    call.arguments = gemma4_to_json(arena, args_node.children[0]);
+                }
+                result.tool_calls.push_back(call);
+            }
+        }
+
+        return;
+    }
+
+    for (auto child_id : node.children) {
+        visit(arena, child_id);
     }
 }
diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
index b84cbed206..1ea3eb7eb8 100644
--- a/common/chat-peg-parser.h
+++ b/common/chat-peg-parser.h
@@ -3,22 +3,9 @@
 #include "chat.h"
 #include "peg-parser.h"
 
-class common_chat_peg_builder : public common_peg_parser_builder {
-  public:
-    static constexpr const char * REASONING_BLOCK = "reasoning-block";
-    static constexpr const char * REASONING = "reasoning";
-    static constexpr const char * CONTENT = "content";
-
-    common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
-    common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
-    common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
-};
-
-inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
-    common_chat_peg_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+#include <map>
+#include <optional>
+#include <vector>
 
 class common_chat_peg_mapper {
   public:
@@ -26,80 +13,183 @@ class common_chat_peg_mapper {
 
     common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
 
+    virtual ~common_chat_peg_mapper() = default;
+
     virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
     virtual void map(const common_peg_ast_node & node);
+  protected:
+    virtual std::string normalize_container_value(const std::string & input);
+  private:
+      // Tool call handling state
+      std::optional<common_chat_tool_call> pending_tool_call;  // Tool call waiting for name
+      common_chat_tool_call *              current_tool          = nullptr;
+      int                                  arg_count             = 0;
+      bool                                 closing_quote_pending = false;
+      std::string                          args_buffer;  // Buffer to delay arguments until tool name is known
+
+      // Returns a reference to the active argument destination string.
+      // Before tool_name is known, writes go to args_buffer; after, to current_tool->arguments.
+      std::string & args_target();
 };
 
-class common_chat_peg_native_builder : public common_chat_peg_builder {
+class common_chat_peg_gemma4_mapper : public common_chat_peg_mapper {
   public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_ID = "tool-id";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARGS = "tool-args";
-
-    common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
-    common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
-    common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
-    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
-    common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
-    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
+    common_chat_peg_gemma4_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
+  private:
+    void visit(const common_peg_ast_arena & arena, common_peg_ast_id id);
 };
 
-class common_chat_peg_native_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
+struct content_structure;
+struct tool_call_structure;
 
+class common_chat_peg_builder : public common_peg_parser_builder {
   public:
-    common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    // Tag constants (from former common_chat_peg_base_builder)
+    static constexpr const char * REASONING_BLOCK = "reasoning-block";
+    static constexpr const char * REASONING       = "reasoning";
+    static constexpr const char * CONTENT         = "content";
+
+    // Tag constants
+    static constexpr const char * TOOL           = "tool";
+    static constexpr const char * TOOL_OPEN      = "tool-open";
+    static constexpr const char * TOOL_CLOSE     = "tool-close";
+    static constexpr const char * TOOL_ID        = "tool-id";
+    static constexpr const char * TOOL_NAME      = "tool-name";
+    static constexpr const char * TOOL_ARGS      = "tool-args";
+    static constexpr const char * TOOL_ARG       = "tool-arg";
+    static constexpr const char * TOOL_ARG_OPEN  = "tool-arg-open";
+    static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
+    static constexpr const char * TOOL_ARG_NAME         = "tool-arg-name";
+    static constexpr const char * TOOL_ARG_VALUE        = "tool-arg-value";
+    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";  // For schema-declared string types
 
-    void map(const common_peg_ast_node & node) override;
-};
+    // Low-level tag methods (from former common_chat_peg_base_builder)
+    common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
 
-inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
-    common_chat_peg_native_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+    common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
 
-class common_chat_peg_constructed_builder : public common_chat_peg_builder {
-  public:
-    static constexpr const char * TOOL = "tool";
-    static constexpr const char * TOOL_OPEN = "tool-open";
-    static constexpr const char * TOOL_CLOSE = "tool-close";
-    static constexpr const char * TOOL_NAME = "tool-name";
-    static constexpr const char * TOOL_ARG = "tool-arg";
-    static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
-    static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
-    static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
-    static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
-    static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
+    common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
+
+    common_peg_parser tag_with_safe_content(const std::string &       tag_name,
+                        const std::string &       marker,
+                        const common_peg_parser & p);
 
+    // Low-level tag methods
     common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
     common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
     common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
+    common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
     common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
+    common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
     common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
     common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
     common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
     common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
+    common_peg_parser tool_arg_value(const common_peg_parser & p) { return tag(TOOL_ARG_VALUE, p); }
+
+    // Use for schema-declared string types - won't be treated as potential JSON container
     common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
-    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
+    common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_VALUE, p)); }
+
+
+    // Return a parser that parses the prefix of a string, up to a given delimiter.
+    common_peg_parser prefix(const std::string & s, const std::string & delimiter = {});
+
+    // Legacy-compatible helper for building standard JSON tool calls
+    // Used by tests and manual parsers
+    // name_key/args_key: JSON key names for function name and arguments
+    //   Empty or "name"/"arguments" will accept both common variations
+    //   Supports dot notation for nested objects (e.g., "function.name")
+    // array_wrapped: if true, tool calls are wrapped in JSON array [...]
+    // function_is_key: if true, function name is the JSON key (e.g., {"func_name": {...}})
+    // call_id_key: JSON key for string call ID (e.g., "id")
+    // gen_call_id_key: JSON key for generated integer call ID (e.g., "tool_call_id")
+    // parameters_order: order in which JSON fields should be parsed
+    common_peg_parser standard_json_tools(const std::string &              section_start,
+                                          const std::string &              section_end,
+                                          const nlohmann::ordered_json &   tools,
+                                          bool                             parallel_tool_calls,
+                                          bool                             force_tool_calls,
+                                          const std::string &              name_key = "",
+                                          const std::string &              args_key = "",
+                                          bool                             array_wrapped = false,
+                                          bool                             function_is_key = false,
+                                          const std::string &              call_id_key = "",
+                                          const std::string &              gen_call_id_key = "",
+                                          const std::vector<std::string> & parameters_order = {});
+
+    // Legacy-compatible helper for building XML/tagged style tool calls
+    // Used by tests and manual parsers
+    common_peg_parser standard_constructed_tools(const std::map<std::string, std::string> & markers,
+                                                 const nlohmann::ordered_json &             tools,
+                                                 bool                                       parallel_tool_calls,
+                                                 bool                                       force_tool_calls);
+
+    // Helper for Python-style function call format: name(arg1="value1", arg2=123)
+    // Used by LFM2 and similar templates
+    common_peg_parser python_style_tool_calls(const nlohmann::ordered_json & tools,
+                                              bool                           parallel_tool_calls);
+
+  private:
+    // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
+    common_peg_parser build_json_tools_function_is_key(const nlohmann::ordered_json & tools,
+                                                       const std::string &            args_key,
+                                                       const std::string &            effective_args_key,
+                                                       const std::string &            call_id_key,
+                                                       const std::string &            gen_call_id_key);
+
+    common_peg_parser build_json_tools_nested_keys(const nlohmann::ordered_json & tools,
+                                                   const std::string &            effective_name_key,
+                                                   const std::string &            effective_args_key,
+                                                   const std::string &            call_id_key,
+                                                   const std::string &            gen_call_id_key);
+
+    common_peg_parser build_json_tools_flat_keys(const nlohmann::ordered_json &   tools,
+                                                 const std::string &              effective_name_key,
+                                                 const std::string &              effective_args_key,
+                                                 const std::string &              call_id_key,
+                                                 const std::string &              gen_call_id_key,
+                                                 const std::vector<std::string> & parameters_order);
 };
 
-class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
-    common_chat_tool_call * current_tool;
-    int arg_count = 0;
-    bool needs_closing_quote = false;
+inline common_peg_arena build_chat_peg_parser(
+  const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
+  common_chat_peg_builder builder;
+  builder.set_root(fn(builder));
+  return builder.build();
+}
 
+class tag_based_peg_mapper {
   public:
-    common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
+    std::map<std::string, std::string> tags;
 
-    void map(const common_peg_ast_node & node) override;
+    void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
 };
 
-inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
-    common_chat_peg_constructed_builder builder;
-    builder.set_root(fn(builder));
-    return builder.build();
-}
+struct tagged_parse_result {
+    common_peg_parse_result              result;
+    std::map<std::string, std::string> tags;
+};
+
+struct tagged_peg_parser {
+    common_peg_arena arena;
+    common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_NONE;
+
+    tagged_peg_parser & withDebug() {
+      flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
+      return *this;
+    }
+
+    tagged_peg_parser & withoutDebug() {
+      flags = flags & ~COMMON_PEG_PARSE_FLAG_DEBUG;
+      return *this;
+    }
+
+    tagged_parse_result parse_and_extract(const std::string & input, common_peg_parse_flags extra_flags = COMMON_PEG_PARSE_FLAG_NONE) const;
+    tagged_parse_result parse_anywhere_and_extract(const std::string & input) const;
+};
+
+tagged_peg_parser build_tagged_peg_parser(
+    const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
+
diff --git a/common/chat.cpp b/common/chat.cpp
index e4743cf028..aea1bd850a 100644
--- a/common/chat.cpp
+++ b/common/chat.cpp
@@ -1,40 +1,57 @@
 #include "chat.h"
-#include "chat-parser.h"
+
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
 #include "chat-peg-parser.h"
 #include "common.h"
-#include "json-partial.h"
+#include "ggml.h"
 #include "json-schema-to-grammar.h"
 #include "log.h"
-#include "regex-partial.h"
 
-#include "jinja/parser.h"
 #include "jinja/value.h"
 #include "jinja/runtime.h"
 #include "jinja/caps.h"
+#include "peg-parser.h"
+
+#include "nlohmann/json.hpp"
 
-#include <algorithm>
 #include <cstdio>
-#include <cctype>
+#include <cstdlib>
+#include <ctime>
 #include <exception>
 #include <functional>
-#include <iostream>
+
 #include <optional>
+#include <sstream>
 #include <stdexcept>
 #include <string>
+#include <utility>
 #include <vector>
 #include <fstream>
 
 using json = nlohmann::ordered_json;
 
 static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
-    auto time = std::chrono::system_clock::to_time_t(now);
-    auto local_time = *std::localtime(&time);
+    auto               time       = std::chrono::system_clock::to_time_t(now);
+    auto               local_time = *std::localtime(&time);
     std::ostringstream ss;
     ss << std::put_time(&local_time, format.c_str());
     auto res = ss.str();
     return res;
 }
 
+static json safe_args_parse(const std::string & to_parse) {
+    std::string stripped = to_parse;
+    if (to_parse.at(0) == '"' && to_parse.at(to_parse.length() - 1) == '"') {
+        stripped = to_parse.substr(1, to_parse.length() - 1);
+    }
+    try {
+        return json::parse(stripped);
+    } catch (json::exception & e) {
+        return stripped;
+    }
+}
+
 static std::string string_diff(const std::string & last, const std::string & current) {
     if (last.empty()) {
         return current;
@@ -117,7 +134,7 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
                 {"type", "function"},
                 {"function", {
                     {"name", tool_call.name},
-                    {"arguments", tool_call.arguments},
+                    {"arguments", json(tool_call.arguments)},
                 }},
             };
             if (!tool_call.id.empty()) {
@@ -134,7 +151,8 @@ json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
     return jmsg;
 }
 
-std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
+std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv,
+                                                                      const common_chat_msg & msg_new) {
     std::vector<common_chat_msg_diff> diffs;
     if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
         diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
@@ -144,38 +162,56 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
     // TODO: these can become expensive for long messages - how to optimize?
     if (msg_prv.reasoning_content != msg_new.reasoning_content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff                  = diffs.emplace_back();
         diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
     }
     if (msg_prv.content != msg_new.content) {
-        auto & diff = diffs.emplace_back();
+        auto & diff        = diffs.emplace_back();
         diff.content_delta = string_diff(msg_prv.content, msg_new.content);
     }
 
     if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
-        throw std::runtime_error("Invalid diff: now finding less tool calls!");
+        std::string err = "Invalid diff: now finding less tool calls!\n";
+        err += "  Previous (" + std::to_string(msg_prv.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_prv.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current (" + std::to_string(msg_new.tool_calls.size()) + "):\n";
+        for (const auto & tc : msg_new.tool_calls) {
+            err += "    - name: '" + tc.name + "', args: '" + tc.arguments + "'\n";
+        }
+        err += "  Current msg text content:\n" + msg_new.content + "\n";
+        throw std::runtime_error(err);
     }
 
     if (!msg_prv.tool_calls.empty()) {
-        const auto idx = msg_prv.tool_calls.size() - 1;
+        const auto   idx  = msg_prv.tool_calls.size() - 1;
         const auto & pref = msg_prv.tool_calls[idx];
         const auto & newf = msg_new.tool_calls[idx];
-        if (pref.name != newf.name) {
-            throw std::runtime_error("Invalid diff: tool call mismatch!");
+        // Allow tool name to change during incremental parsing:
+        // - empty -> non-empty (initial discovery)
+        // - prefix -> longer string (name grows as more input is parsed)
+        if (pref.name != newf.name && !pref.name.empty() && !newf.name.empty()) {
+            // Check if one is a prefix of the other (for incremental parsing where names grow or shrink)
+            bool is_prefix = (newf.name.rfind(pref.name, 0) == 0);
+            if (!is_prefix) {
+                LOG_ERR("Tool call mismatch: prev='%s' new='%s'\n", pref.name.c_str(), newf.name.c_str());
+                throw std::runtime_error("Invalid diff: tool call mismatch!");
+            }
         }
         const auto args_diff = string_diff(pref.arguments, newf.arguments);
-        if (!args_diff.empty() || pref.id != newf.id) {
-            auto & diff = diffs.emplace_back();
+        if (!args_diff.empty() || pref.id != newf.id || pref.name != newf.name) {
+            auto & diff          = diffs.emplace_back();
             diff.tool_call_index = idx;
-            if (pref.id != newf.id) {
-                diff.tool_call_delta.id = newf.id;
+            if (pref.id != newf.id || pref.name != newf.name) {
+                diff.tool_call_delta.id   = newf.id;
                 diff.tool_call_delta.name = newf.name;
             }
             diff.tool_call_delta.arguments = args_diff;
         }
     }
     for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
-        auto & diff = diffs.emplace_back();
+        auto & diff          = diffs.emplace_back();
         diff.tool_call_index = idx;
         diff.tool_call_delta = msg_new.tool_calls[idx];
     }
@@ -185,94 +221,14 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
 
 using chat_template_caps = jinja::caps;
 
-struct common_chat_template {
-    jinja::program prog;
-    std::string bos_tok;
-    std::string eos_tok;
-    std::string src;
-    chat_template_caps caps;
-
-    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
-        jinja::lexer lexer;
-        auto lexer_res = lexer.tokenize(src);
-        this->prog = jinja::parse_from_tokens(lexer_res);
-
-        this->src = lexer_res.source;
-        this->bos_tok = bos_token;
-        this->eos_tok = eos_token;
-
-        this->caps = jinja::caps_get(prog);
-        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
-    }
-
-    const std::string & source() const { return src; }
-    const std::string & bos_token() const { return bos_tok; }
-    const std::string & eos_token() const { return eos_tok; }
-
-    // TODO: this is ugly, refactor it somehow
-    json add_system(const json & messages, const std::string & system_prompt) const {
-        GGML_ASSERT(messages.is_array());
-        auto msgs_copy = messages;
-        if (!caps.supports_system_role) {
-            if (msgs_copy.empty()) {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "user"},
-                    {"content", system_prompt}
-                });
-            } else {
-                auto & first_msg = msgs_copy[0];
-                if (!first_msg.contains("content")) {
-                    first_msg["content"] = "";
-                }
-                first_msg["content"] = system_prompt + "\n\n"
-                    + first_msg["content"].get<std::string>();
-            }
-        } else {
-            if (msgs_copy.empty() || msgs_copy[0].at("role") != "system") {
-                msgs_copy.insert(msgs_copy.begin(), json{
-                    {"role", "system"},
-                    {"content", system_prompt}
-                });
-            } else if (msgs_copy[0].at("role") == "system") {
-                msgs_copy[0]["content"] = system_prompt;
-            }
-        }
-        return msgs_copy;
-    }
-
-    chat_template_caps original_caps() const {
-        return caps;
-    }
-
-};
-
 struct common_chat_templates {
     bool add_bos;
     bool add_eos;
-    bool has_explicit_template; // Model had builtin template or template overridde was specified.
-    std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
+    bool has_explicit_template;  // Model had builtin template or template overridde was specified.
+    std::unique_ptr<common_chat_template> template_default;  // always set (defaults to chatml)
     std::unique_ptr<common_chat_template> template_tool_use;
 };
 
-struct templates_params {
-    json messages;
-    json tools;
-    common_chat_tool_choice tool_choice;
-    json json_schema;
-    bool parallel_tool_calls;
-    common_reasoning_format reasoning_format;
-    bool stream;
-    std::string grammar;
-    bool add_generation_prompt = true;
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    json extra_context;
-    bool add_bos;
-    bool add_eos;
-    bool is_inference = true;
-    bool mark_input = true; // whether to mark input strings in the jinja context
-};
-
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
     if (tool_choice == "auto") {
         return COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -287,23 +243,24 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
 }
 
 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
-    common_chat_templates_inputs dummy_inputs;
+    common_chat_templates_inputs inputs;
+    inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
     common_chat_msg msg;
-    msg.role = "user";
+    msg.role    = "user";
     msg.content = "test";
-    dummy_inputs.messages = {msg};
-    dummy_inputs.enable_thinking = false;
-    const auto rendered_no_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    dummy_inputs.enable_thinking = true;
-    const auto rendered_with_thinking = common_chat_templates_apply(chat_templates, dummy_inputs);
-    return rendered_no_thinking.prompt != rendered_with_thinking.prompt;
+    inputs.messages = { msg };
+    inputs.enable_thinking = true;
+    inputs.add_generation_prompt = true;
+    inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+
+    auto params = common_chat_templates_apply(chat_templates, inputs);
+    return params.supports_thinking;
 }
 
 std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messages) {
     std::vector<common_chat_msg> msgs;
 
     try {
-
         if (!messages.is_array()) {
             throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
         }
@@ -319,7 +276,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
             }
             msg.role = message.at("role");
 
-            auto has_content = message.contains("content");
+            auto has_content    = message.contains("content");
             auto has_tool_calls = message.contains("tool_calls");
             if (has_content) {
                 const auto & content = message.at("content");
@@ -340,7 +297,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                         msg.content_parts.push_back(msg_part);
                     }
                 } else if (!content.is_null()) {
-                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " +
+                                                content.dump() +
+                                                " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                 }
             }
             if (has_tool_calls) {
@@ -360,8 +319,13 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                     if (!fc.contains("name")) {
                         throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                     }
-                    tc.name = fc.at("name");
-                    tc.arguments = fc.at("arguments");
+                    tc.name           = fc.at("name");
+                    const auto & args = fc.at("arguments");
+                    if (args.is_string()) {
+                        tc.arguments = args;
+                    } else {
+                        tc.arguments = args.dump();
+                    }
                     if (tool_call.contains("id")) {
                         tc.id = tool_call.at("id");
                     }
@@ -369,7 +333,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                 }
             }
             if (!has_content && !has_tool_calls) {
-                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument(
+                    "Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & "
+                    "https://github.com/ggml-org/llama.cpp/issues/12279)");
             }
             if (message.contains("reasoning_content")) {
                 msg.reasoning_content = message.at("reasoning_content");
@@ -394,7 +360,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
 
 static json render_message_to_json(const std::vector<common_chat_msg> & msgs, const jinja::caps & c) {
     if (!c.supports_string_content && !c.supports_typed_content) {
-        LOG_WRN("%s: Neither string content nor typed content is supported by the template. This is unexpected and may lead to issues.\n", __func__);
+        //LOG_WRN("%s: Neither string content nor typed content is supported by the template. This is unexpected and may lead to issues.\n", __func__);
     }
 
     bool only_string_accepted =  c.supports_string_content && !c.supports_typed_content;
@@ -475,12 +441,13 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
     auto result = json::array();
     for (const auto & tool : tools) {
         result.push_back({
-            {"type", "function"},
-            {"function", {
-                {"name", tool.name},
-                {"description", tool.description},
-                {"parameters", json::parse(tool.parameters)},
-            }},
+            { "type",     "function" },
+            { "function",
+             {
+                  { "name", tool.name },
+                  { "description", tool.description },
+                  { "parameters", json::parse(tool.parameters) },
+              }                      },
         });
     }
     return result;
@@ -498,16 +465,20 @@ json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
         json tool_call;
         tool_call["index"] = diff.tool_call_index;
         if (!diff.tool_call_delta.id.empty()) {
-            tool_call["id"] = diff.tool_call_delta.id;
+            tool_call["id"]   = diff.tool_call_delta.id;
             tool_call["type"] = "function";
         }
-        json function = json::object();
-        if (!diff.tool_call_delta.name.empty()) {
-            function["name"] = diff.tool_call_delta.name;
+        if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
+            json function = json::object();
+            if (!diff.tool_call_delta.name.empty()) {
+                function["name"] = diff.tool_call_delta.name;
+            }
+            if (!diff.tool_call_delta.arguments.empty()) {
+                function["arguments"] = diff.tool_call_delta.arguments;
+            }
+            tool_call["function"] = function;
         }
-        function["arguments"] = diff.tool_call_delta.arguments;
-        tool_call["function"] = function;
-        delta["tool_calls"] = json::array({tool_call});
+        delta["tool_calls"] = json::array({ tool_call });
     }
     return delta;
 }
@@ -516,13 +487,13 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
     if (use_jinja) {
         try {
             common_chat_msg msg;
-            msg.role = "user";
+            msg.role    = "user";
             msg.content = "test";
 
             auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl);
 
             common_chat_templates_inputs inputs;
-            inputs.messages = {msg};
+            inputs.messages = { msg };
 
             common_chat_templates_apply(tmpls.get(), inputs);
             return true;
@@ -531,30 +502,30 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
             return false;
         }
     }
-    llama_chat_message chat[] = {{"user", "test"}};
+    llama_chat_message chat[] = {
+        { "user", "test" }
+    };
     const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
     return res >= 0;
 }
 
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja) {
-
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja) {
     common_chat_templates_inputs inputs;
     inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.add_bos   = tmpls->add_bos;
+    inputs.add_eos   = tmpls->add_eos;
 
     std::string fmt_past_msg;
     if (!past_msg.empty()) {
-        inputs.messages = past_msg;
+        inputs.messages              = past_msg;
         auto & extra = inputs.messages.emplace_back();
         extra.role = new_msg.role;
         inputs.add_generation_prompt = false;
-        fmt_past_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+        fmt_past_msg                 = common_chat_templates_apply(tmpls, inputs).prompt;
     }
     std::ostringstream ss;
     // if the past_msg ends with a newline, we must preserve it in the formatted version
@@ -568,7 +539,7 @@ std::string common_chat_format_single(
     }
     // format chat with new_msg
     inputs.add_generation_prompt = add_ass;
-    auto fmt_new_msg = common_chat_templates_apply(tmpls, inputs).prompt;
+    auto fmt_new_msg             = common_chat_templates_apply(tmpls, inputs).prompt;
     if (fmt_new_msg.size() < fmt_past_msg.size()) {
         LOG_ERR("============================================ Oops: new message is of length %zu, past message is %zu\n", fmt_new_msg.size(), fmt_past_msg.size());
         LOG_ERR("=== past message: <%s>\n", fmt_past_msg.c_str());
@@ -580,31 +551,33 @@ std::string common_chat_format_single(
     return ss.str();
 }
 
-std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs) {
     common_chat_templates_inputs inputs;
-    inputs.use_jinja = use_jinja;
-    inputs.add_bos = tmpls->add_bos;
-    inputs.add_eos = tmpls->add_eos;
+    inputs.use_jinja            = use_jinja;
+    inputs.add_bos              = tmpls->add_bos;
+    inputs.add_eos              = tmpls->add_eos;
     inputs.chat_template_kwargs = chat_template_kwargs;
-    auto add_simple_msg = [&](auto role, auto content) {
+    auto add_simple_msg         = [&](auto role, auto content) {
         common_chat_msg msg;
-        msg.role = role;
+        msg.role    = role;
         msg.content = content;
         inputs.messages.push_back(msg);
     };
-    add_simple_msg("system",    "You are a helpful assistant");
-    add_simple_msg("user",      "Hello");
+    add_simple_msg("system", "You are a helpful assistant");
+    add_simple_msg("user", "Hello");
     add_simple_msg("assistant", "Hi there");
-    add_simple_msg("user",      "How are you?");
+    add_simple_msg("user", "How are you?");
     return common_chat_templates_apply(tmpls, inputs).prompt;
 }
 
-#define CHATML_TEMPLATE_SRC \
-    "{%- for message in messages -%}\n" \
+#define CHATML_TEMPLATE_SRC                                                               \
+    "{%- for message in messages -%}\n"                                                   \
     "  {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \
-    "{%- endfor -%}\n" \
-    "{%- if add_generation_prompt -%}\n" \
-    "  {{- '<|im_start|>assistant\n' -}}\n" \
+    "{%- endfor -%}\n"                                                                    \
+    "{%- if add_generation_prompt -%}\n"                                                  \
+    "  {{- '<|im_start|>assistant\n' -}}\n"                                               \
     "{%- endif -%}"
 
 void common_chat_templates_free(struct common_chat_templates * tmpls) {
@@ -622,19 +595,16 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
                 return tmpls->template_tool_use->source();
             }
             return "";
-        } else {
-            LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
         }
+        LOG_DBG("%s: unknown template variant: %s\n", __func__, variant.c_str());
     }
     return tmpls->template_default->source();
 }
 
-common_chat_templates_ptr common_chat_templates_init(
-    const struct llama_model * model,
-    const std::string & chat_template_override,
-    const std::string & bos_token_override,
-    const std::string & eos_token_override)
-{
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override,
+                                                     const std::string &        eos_token_override) {
     std::string default_template_src;
     std::string template_tool_use_src;
 
@@ -643,7 +613,7 @@ common_chat_templates_ptr common_chat_templates_init(
         GGML_ASSERT(model != nullptr);
         const auto * str = llama_model_chat_template(model, /* name */ nullptr);
         if (str) {
-            default_template_src = str;
+            default_template_src  = str;
             has_explicit_template = true;
         }
         str = llama_model_chat_template(model, /* name */ "tool_use");
@@ -665,34 +635,40 @@ common_chat_templates_ptr common_chat_templates_init(
     // TODO @ngxson : this is a temporary hack to prevent chat template from throwing an error
     // Ref: https://github.com/ggml-org/llama.cpp/pull/15230#issuecomment-3173959633
     if (default_template_src.find("<|channel|>") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("in message.content or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("in message.content or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if \"<|channel|>analysis<|message|>\" in message.content or \"<|channel|>final<|message|>\" in message.content %}",
-            "{%- if false %}");
+                           "{%- if \"<|channel|>analysis<|message|>\" in message.content or "
+                           "\"<|channel|>final<|message|>\" in message.content %}",
+                           "{%- if false %}");
     }
 
     // TODO @aldehir : this is a temporary fix, pending Minja changes
     // Ref: https://github.com/ggml-org/llama.cpp/pull/17713#issuecomment-3631342664
     if (default_template_src.find("[TOOL_CALLS]") != std::string::npos
-            // search for the error message and patch it
-            && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
+        // search for the error message and patch it
+        && default_template_src.find("if (message['content'] is none or") != std::string::npos) {
         string_replace_all(default_template_src,
-            "{%- if (message['content'] is none or message['content'] == '' or message['content']|length == 0) and (message['tool_calls'] is not defined or message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
-            "{%- if false %}");
+                           "{%- if (message['content'] is none or message['content'] == '' or "
+                           "message['content']|length == 0) and (message['tool_calls'] is not defined or "
+                           "message['tool_calls'] is none or message['tool_calls']|length == 0) %}",
+                           "{%- if false %}");
     }
 
     std::string token_bos = bos_token_override;
     std::string token_eos = eos_token_override;
-    bool add_bos = false;
-    bool add_eos = false;
+    bool        add_bos   = false;
+    bool        add_eos   = false;
     if (model) {
-        const auto * vocab = llama_model_get_vocab(model);
-        const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
+        const auto * vocab     = llama_model_get_vocab(model);
+        const auto   get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) {
             if (token == LLAMA_TOKEN_NULL) {
-                if (default_template_src.find(jinja_variable_name) != std::string::npos
-                    || template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
-                    LOG_WRN("common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't work as intended.\n", name);
+                if (default_template_src.find(jinja_variable_name) != std::string::npos ||
+                    template_tool_use_src.find(jinja_variable_name) != std::string::npos) {
+                    LOG_WRN(
+                        "common_chat_templates_init: warning: vocab does not have a %s token, jinja template won't "
+                          "work as intended.\n",
+                        name);
                 }
                 return std::string();
             }
@@ -700,13 +676,13 @@ common_chat_templates_ptr common_chat_templates_init(
         };
         token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token");
         token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token");
-        add_bos = llama_vocab_get_add_bos(vocab);
-        add_eos = llama_vocab_get_add_eos(vocab);
+        add_bos   = llama_vocab_get_add_bos(vocab);
+        add_eos   = llama_vocab_get_add_eos(vocab);
     }
     common_chat_templates_ptr tmpls(new common_chat_templates());
     tmpls->has_explicit_template = has_explicit_template;
-    tmpls->add_bos = add_bos;
-    tmpls->add_eos = add_eos;
+    tmpls->add_bos               = add_bos;
+    tmpls->add_eos               = add_eos;
     try {
         tmpls->template_default = std::make_unique<common_chat_template>(default_template_src, token_bos, token_eos);
     } catch (const std::exception & e) {
@@ -727,35 +703,14 @@ common_chat_templates_ptr common_chat_templates_init(
 
 const char * common_chat_format_name(common_chat_format format) {
     switch (format) {
-        case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
-        case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
-        case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
-        case COMMON_CHAT_FORMAT_MAGISTRAL: return "Magistral";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
-        case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
-        case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
-        case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
-        case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
-        case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
-        case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
-        case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
-        case COMMON_CHAT_FORMAT_GPT_OSS: return "GPT-OSS";
-        case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";
-        case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";
-        case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";
-        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";
-        case COMMON_CHAT_FORMAT_MINIMAX_M2: return "MiniMax-M2";
-        case COMMON_CHAT_FORMAT_GLM_4_5: return "GLM 4.5";
-        case COMMON_CHAT_FORMAT_KIMI_K2: return "Kimi K2";
-        case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
-        case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
-        case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
-        case COMMON_CHAT_FORMAT_MIROTHINKER: return "MiroThinker";
-        case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
-        case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
-        case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
+        case COMMON_CHAT_FORMAT_CONTENT_ONLY:
+            return "Content-only";
+        case COMMON_CHAT_FORMAT_PEG_SIMPLE:
+            return "peg-simple";
+        case COMMON_CHAT_FORMAT_PEG_NATIVE:
+            return "peg-native";
+        case COMMON_CHAT_FORMAT_PEG_GEMMA4:
+            return "peg-gemma4";
         default:
             throw std::runtime_error("Unknown chat format");
     }
@@ -763,10 +718,14 @@ const char * common_chat_format_name(common_chat_format format) {
 
 const char * common_reasoning_format_name(common_reasoning_format format) {
     switch (format) {
-        case COMMON_REASONING_FORMAT_NONE:     return "none";
-        case COMMON_REASONING_FORMAT_AUTO:     return "auto";
-        case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
-        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
+        case COMMON_REASONING_FORMAT_NONE:
+            return "none";
+        case COMMON_REASONING_FORMAT_AUTO:
+            return "auto";
+        case COMMON_REASONING_FORMAT_DEEPSEEK:
+            return "deepseek";
+        case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY:
+            return "deepseek-legacy";
         default:
             throw std::runtime_error("Unknown reasoning format");
     }
@@ -775,11 +734,14 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
 common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
     if (format == "none") {
         return COMMON_REASONING_FORMAT_NONE;
-    } else if (format == "auto") {
+    }
+    if (format == "auto") {
         return COMMON_REASONING_FORMAT_AUTO;
-    } else if (format == "deepseek") {
+    }
+    if (format == "deepseek") {
         return COMMON_REASONING_FORMAT_DEEPSEEK;
-    } else if (format == "deepseek-legacy") {
+    }
+    if (format == "deepseek-legacy") {
         return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
     }
     throw std::runtime_error("Unknown reasoning format: " + format);
@@ -795,7 +757,8 @@ static void foreach_function(const json & tools, const std::function<void(const
     }
 }
 
-static void foreach_parameter(const json & function, const std::function<void(const std::string &, const json &, bool)> & fn) {
+static void foreach_parameter(const json &                                                         function,
+                              const std::function<void(const std::string &, const json &, bool)> & fn) {
     if (!function.contains("parameters") || !function.at("parameters").is_object()) {
         return;
     }
@@ -803,7 +766,7 @@ static void foreach_parameter(const json & function, const std::function<void(co
     if (!params.contains("properties") || !params.at("properties").is_object()) {
         return;
     }
-    const auto & props = params.at("properties");
+    const auto &          props = params.at("properties");
     std::set<std::string> required;
     if (params.contains("required") && params.at("required").is_array()) {
         params.at("required").get_to(required);
@@ -814,19 +777,19 @@ static void foreach_parameter(const json & function, const std::function<void(co
     }
 }
 
-static std::string apply(
+static std::string common_chat_template_direct_apply_impl(
     const common_chat_template & tmpl,
-    const struct templates_params & inputs,
+    const autoparser::generation_params & inputs,
     const std::optional<json> & messages_override = std::nullopt,
     const std::optional<json> & tools_override = std::nullopt,
-    const std::optional<json> & additional_context = std::nullopt)
-{
+    const std::optional<json> & additional_context = std::nullopt) {
     jinja::context ctx(tmpl.source());
 
     nlohmann::ordered_json inp = nlohmann::ordered_json{
         {"messages", messages_override.has_value() ? *messages_override : inputs.messages},
         {"bos_token", tmpl.bos_token()},
         {"eos_token", tmpl.eos_token()},
+        {"enable_thinking", inputs.enable_thinking},
     };
     if (tools_override.has_value() || !inputs.tools.empty()) {
         inp["tools"] = tools_override.has_value() ? *tools_override : inputs.tools;
@@ -852,7 +815,7 @@ static std::string apply(
     // render
     jinja::runtime runtime(ctx);
     const jinja::value results = runtime.execute(tmpl.prog);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
 
     std::string result = parts->as_string().str();
 
@@ -866,265 +829,14 @@ static std::string apply(
     return result;
 }
 
-static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    auto tool_call_schemas = json::array();
-    foreach_function(inputs.tools, [&](const json & tool) {
-        const auto & function = tool.at("function");
-        auto tool_schema = json {
-            {"type", "object"},
-            {"properties", {
-                {"name", {
-                    {"type", "string"},
-                    {"const", function.at("name")},
-                }},
-                {"arguments", function.at("parameters")},
-            }},
-            {"required", json::array({"name", "arguments"})},
-        };
-        if (function.contains("description")) {
-            tool_schema["description"] = function.at("description");
-        }
-        if (inputs.parallel_tool_calls) {
-            tool_schema.at("properties")["id"] = {
-                {"type", "string"},
-                {"minLength", 4},
-            };
-            tool_schema.at("required").push_back("id");
-        }
-        tool_call_schemas.emplace_back(tool_schema);
-    });
-    const auto tool_call =
-        inputs.parallel_tool_calls
-            ? json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_calls", {
-                        {"type", "array"},
-                        {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                            {"anyOf", tool_call_schemas},
-                        }},
-                        {"minItems", 1},
-                    }},
-                }},
-                {"required", json::array({"tool_calls"})},
-            }
-            : json {
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
-                        {"anyOf", tool_call_schemas},
-                    }},
-                }},
-                {"required", json::array({"tool_call"})},
-            };
-    const auto schema =
-        inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED
-            ? json {
-                {"anyOf", json::array({
-                    tool_call,
-                    {
-                        {"type", "object"},
-                        {"properties", {
-                            {"response", inputs.json_schema.is_null()
-                                ? json {{"type", "string"}}
-                                : inputs.json_schema
-                            },
-                        }},
-                        {"required", json::array({"response"})},
-                    },
-                })}
-            }
-            : tool_call;
-
-    data.grammar_lazy = false;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        builder.add_schema("root", schema);
-    });
-
-    auto tweaked_messages = tmpl.add_system(
-        inputs.messages,
-        "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
-
-    // ensure all messages has "content" field
-    for (auto & message : tweaked_messages) {
-        if (!message.contains("content") || message["content"].is_null()) {
-            message["content"] = "";
-        }
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
-    return data;
-}
-
-static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    // Important note: the model is probably trained to take a JSON stringified arguments value.
-                    // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
-                    {"name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"arguments", function.at("parameters")},
-                    {"id", {
-                        {"type", "string"},
-                        // Nemo's template expects a 9-character alphanumeric ID.
-                        {"pattern", "^[a-zA-Z0-9]{9}$"},
-                    }},
-                }},
-                {"required", json::array({"name", "arguments", "id"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-    });
-    data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-    data.preserved_tokens = {
-        "[TOOL_CALLS]",
-    };
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
-    return data;
-}
-
-
-// Case-insensitive find
-static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {
-    auto it = std::search(
-        haystack.begin() + pos, haystack.end(),
-        needle.begin(), needle.end(),
-        [](char a, char b) { return std::tolower(a) == std::tolower(b); }
-    );
-    return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);
-}
-
-static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    const auto is_json_schema_provided = !inputs.json_schema.is_null();
-    const auto is_grammar_provided = !inputs.grammar.empty();
-    const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();
-
-    // the logic requires potentially modifying the messages
-    auto tweaked_messages = inputs.messages;
-
-    auto replace_json_schema_marker = [](json & messages) -> bool {
-        static std::string marker1 = "force json schema.\n";
-        static std::string marker2 = "force json schema.";
-
-        if (messages.empty() || messages.at(0).at("role") != "system") {
-            return false;
-        }
-
-        std::string content = messages.at(0).at("content");
-
-        for (const auto & marker : {marker1, marker2}) {
-            const auto pos = ifind_string(content, marker);
-            if (pos != std::string::npos) {
-                content.replace(pos, marker.length(), "");
-                // inject modified content back into the messages
-                messages.at(0).at("content") = content;
-                return true;
-            }
-        }
-
-        return false;
-    };
-
-    // Lfm2 model does not natively work with json, but can generally understand the tools structure
-    //
-    // Example of the pytorch dialog structure:
-    //     <|startoftext|><|im_start|>system
-    //     List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>
-    //     <|im_start|>user
-    //     What is the current status of candidate ID 12345?<|im_end|>
-    //     <|im_start|>assistant
-    //     <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>
-    //     <|im_start|>tool
-    //     <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>
-    //     <|im_start|>assistant
-    //     The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>
-    //
-    // For the llama server compatibility with json tools semantic,
-    // the client can add "Follow json schema." line into the system message prompt to force the json output.
-    //
-    if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {
-        // server/utils.hpp prohibits that branch for the custom grammar anyways
-        throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");
-    } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {
-        LOG_INF("%s: Using tools to build a grammar\n", __func__);
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-
-            builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");
-        });
-        // model has no concept of tool selection mode choice,
-        // if the system prompt rendered correctly it will produce a tool call
-        // the grammar goes inside the tool call body
-        data.grammar_lazy = true;
-        data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-        data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;
-    } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {
-        LOG_INF("%s: Using tools without json schema or grammar\n", __func__);
-        // output those tokens
-        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};
-    } else if (is_json_schema_provided) {
-        LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else if (is_grammar_provided) {
-        LOG_INF("%s: Using provided grammar\n", __func__);
-        data.grammar = inputs.grammar;
-    } else {
-        LOG_INF("%s: Using content relying on the template\n", __func__);
-    }
-
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);
-    LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());
-
-    return data;
+std::string common_chat_template_direct_apply(
+    const common_chat_template & tmpl,
+    const autoparser::generation_params & inputs) {
+    return common_chat_template_direct_apply_impl(tmpl, inputs, std::nullopt, std::nullopt, std::nullopt);
 }
 
-static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_ministral_3(const common_chat_template &    tmpl,
+                                                              const autoparser::generation_params & inputs) {
     common_chat_params data;
 
     // Build up messages to follow the format: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512/blob/main/chat_template.jinja
@@ -1142,8 +854,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         // If message contains `reasoning_content`, add it as a block of type `thinking`
         if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
             content.push_back({
-                {"type", "thinking"},
-                {"thinking", msg.at("reasoning_content").get<std::string>()},
+                { "type",     "thinking"                                     },
+                { "thinking", msg.at("reasoning_content").get<std::string>() },
             });
         }
 
@@ -1151,8 +863,8 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         if (msg.contains("content")) {
             if (msg.at("content").is_string()) {
                 content.push_back({
-                    {"type", "text"},
-                    {"text", msg.at("content").get<std::string>()},
+                    { "type", "text"                               },
+                    { "text", msg.at("content").get<std::string>() },
                 });
             } else if (msg.at("content").is_array()) {
                 auto blocks = msg.at("content");
@@ -1160,32 +872,38 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             }
         }
 
-        auto adjusted = msg;
+        auto adjusted       = msg;
         adjusted["content"] = content;
         adjusted.erase("reasoning_content");
         adjusted_messages.push_back(adjusted);
     }
 
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
+    auto has_tools            = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_response_format  = inputs.json_schema.is_object() && !inputs.json_schema.empty();
+    auto extract_reasoning    = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar      = true;
 
-    data.prompt = apply(tmpl, inputs, /* messages_override = */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
+    data.supports_thinking  = true;
+    data.thinking_start_tag = "[THINK]";
+    data.thinking_end_tag   = "[/THINK]";
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs, /* messages_override = */ adjusted_messages);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens  = {
         "[THINK]",
         "[/THINK]",
         "[TOOL_CALLS]",
         "[ARGS]",
     };
 
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto reasoning = extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.prefix(inputs.generation_prompt, "[THINK]");
+        auto reasoning =
+            extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
 
         // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
+        if (has_response_format) {
             // Ministral wants to emit json surrounded by code fences
-            return reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```";
+            return generation_prompt + (reasoning << "```json" << p.content(p.schema(p.json(), "response-format", inputs.json_schema)) << "```");
         }
 
         // Tool call parser
@@ -1193,25 +911,24 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
             auto tool_choice = p.choice();
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                const auto & schema = function.at("parameters");
+                std::string  name     = function.at("name");
+                const auto & schema   = function.at("parameters");
 
-                tool_choice |= p.rule("tool-" + name,
-                    p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]")
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
-                );
+                tool_choice |=
+                    p.rule("tool-" + name, p.tool_open(p.tool_name(p.literal(name)) + "[ARGS]") +
+                                               p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
             });
 
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+            auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
             auto tool_calls = p.trigger_rule("tool-call", p.repeat("[TOOL_CALLS]" + tool_choice, min_calls, max_calls));
 
-            return reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls;
+            return generation_prompt + (reasoning << p.content(p.until("[TOOL_CALLS]")) << tool_calls);
         }
 
         // Content only parser
         include_grammar = false;
-        return reasoning << p.content(p.rest());
+        return generation_prompt + (reasoning << p.content(p.rest()));
     });
 
     data.parser = parser.save();
@@ -1222,1620 +939,841 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
+            }
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]" }
         };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template &    tmpl,
+                                                          const autoparser::generation_params & inputs) {
     common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
-    data.preserved_tokens = {
-        "[THINK]",
-        "[/THINK]",
-    };
 
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                        {"id", {
-                            {"type", "string"},
-                            {"pattern", "^[a-zA-Z0-9]{9}$"},
-                        }},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[TOOL_CALLS]"});
-        data.preserved_tokens.push_back("[TOOL_CALLS]");
-    } else {
-        data.grammar_lazy = false;
-        if (!inputs.json_schema.is_null()) {
-            if (!inputs.grammar.empty()) {
-                throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
+    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
+    auto adjusted_messages = json::array();
+    for (auto msg : inputs.messages) {
+        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
+            msg["thinking"] = msg.at("reasoning_content");
+            if (msg.contains("tool_calls") && msg.at("tool_calls").is_array() && !msg.at("tool_calls").empty()) {
+                msg.erase("content");
             }
-            data.grammar = json_schema_to_grammar(inputs.json_schema);
-        } else {
-            data.grammar = inputs.grammar;
         }
+        adjusted_messages.push_back(msg);
     }
 
-    return data;
-}
-
-static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
+    auto prompt = common_chat_template_direct_apply_impl(tmpl, inputs, /* messages_override= */ adjusted_messages);
 
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-        if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["tool_plan"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-    data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
-    if (string_ends_with(data.prompt, "<|START_THINKING|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|END_THINKING|>";
-        } else {
-            data.thinking_forced_open = true;
+    // Check if we need to replace the return token with end token during
+    // inference and without generation prompt. For more details see:
+    // https://github.com/ggml-org/llama.cpp/issues/15417
+    if (inputs.is_inference && !inputs.add_generation_prompt) {
+        static constexpr std::string_view return_token = "<|return|>";
+        static constexpr std::string_view end_token    = "<|end|>";
+        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
+            prompt.replace(pos, return_token.length(), end_token);
         }
-    } else if (!inputs.enable_thinking && string_ends_with(data.prompt, "<|CHATBOT_TOKEN|>")) {
-        data.prompt += "<|START_THINKING|><|END_THINKING|>";
     }
 
-    data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-    data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-        auto schemas = json::array();
-        foreach_function(inputs.tools, [&](const json & tool) {
-            const auto & function = tool.at("function");
-            schemas.push_back({
-                {"type", "object"},
-                {"properties", {
-                    {"tool_call_id", {
-                        {"type", "string"},
-                        // Command-R's template expects an integer string.
-                        {"pattern", "^[0-9]{1,10}$"},
-                    }},
-                    {"tool_name", {
-                        {"type", "string"},
-                        {"const", function.at("name")},
-                    }},
-                    {"parameters", function.at("parameters")},
-                }},
-                {"required", json::array({"tool_call_id", "tool_name", "parameters"})},
-            });
-        });
-        auto schema = json {
-            {"type", "array"},
-            {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-            {"minItems", 1},
-        };
-        if (!inputs.parallel_tool_calls) {
-            schema["maxItems"] = 1;
-        }
-        builder.add_rule("root",
-            std::string(data.thinking_forced_open ? "( \"<|END_THINKING|>\" space )? " : "") +
-            "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
-    });
-    data.grammar_triggers.push_back({
-        COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-        // If thinking_forced_open, then we capture the </think> tag in the grammar,
-        // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-        std::string(data.thinking_forced_open ? "[\\s\\S]*?(<\\|END_THINKING\\|>\\s*)" : "(?:<\\|START_THINKING\\|>[\\s\\S]*?<\\|END_THINKING\\|>\\s*)?") +
-            "(<\\|START_ACTION\\|>)[\\s\\S]*"
-    });
+    data.prompt            = prompt;
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+
+    // These special tokens are required to parse properly, so we include them
+    // even if parse_tool_calls is false.
     data.preserved_tokens = {
-        "<|START_ACTION|>",
-        "<|END_ACTION|>",
-        "<|START_RESPONSE|>",
-        "<|END_RESPONSE|>",
-        "<|START_THINKING|>",
-        "<|END_THINKING|>",
+        "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
     };
-    return data;
-}
 
-static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
-    if (!parameters.is_object() || !parameters.contains("type") || parameters.at("type") != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
-        throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
-    }
-    const auto & parameters_properties = parameters.at("properties");
-    const auto & parameters_required = parameters.at("required");
-    for (const auto & prop : expected_properties) {
-        if (!parameters_properties.contains(prop)) {
-            throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); // NOLINT
-        }
-        if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
-            throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); // NOLINT
+    auto has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
+    auto include_grammar     = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
+    auto extract_reasoning   = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto start           = p.rule("start", p.literal("<|start|>assistant"));
+        auto end             = p.rule("end", p.literal("<|end|>"));
+        auto content         = p.rule("message-content", p.until("<|end|>"));
+        auto channel         = p.literal("<|channel|>") + (p.literal("commentary") | p.literal("analysis"));
+        auto constrain_type  = p.chars("[A-Za-z0-9_-]", 1, -1);
+
+        // Occasionally, gpt-oss-20b will prefix channels with this commentary
+        auto stray_commentary = p.optional(p.literal("<|channel|>commentary") + p.optional(p.literal(" to=assistant")));
+        auto start_analysis = stray_commentary + p.literal("<|channel|>analysis<|message|>");
+
+        if (extract_reasoning) {
+            p.rule("analysis", start_analysis + p.reasoning(content) + end);
+        } else {
+            p.rule("analysis", p.content(start_analysis + content + end));
         }
-    }
-    if (parameters_properties.size() != expected_properties.size()) {
-        throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
-    }
-}
 
-static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
-    auto builtin_tools = json::array();
-    common_chat_params data;
-    if (!inputs.tools.is_null()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-
-            auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
-                if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
-                    expect_tool_parameters(name, parameters, {"query"});
-                } else if (name == "python" || name == "code_interpreter") {
-                    // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
-                    expect_tool_parameters(name, parameters, {"code"});
-                } else {
-                    return false;
-                }
+        auto analysis = p.ref("analysis");
+        auto preamble = p.rule("preamble", p.literal("<|channel|>commentary<|message|>") + p.content(content) + end);
+        auto final_msg = p.rule("final", stray_commentary + p.literal("<|channel|>final<|message|>") + p.content(content));
 
-                std::vector<std::string> kvs;
-                for (const auto & [key, value] : parameters.at("properties").items()) {
-                    kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
-                }
+        // Consume any unsolicited tool calls, e.g. builtin functions
+        auto unsolicited = p.rule("unsolicited", p.atomic(p.optional(channel) + p.literal(" to=") + content + end));
 
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
-                builtin_tools.push_back(name);
+        auto any = p.rule("any", preamble | analysis);
 
-                return true;
-            };
+        if (has_response_format) {
+            auto constraint = p.optional(p.space() + p.optional(p.literal("<|constrain|>")) + constrain_type);
+            auto response_format = p.rule("response-format",
+                p.literal("<|channel|>final") + constraint + p.literal("<|message|>") +
+                p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
+
+            return p.zero_or_more(start + analysis) + start + response_format;
+        }
+
+        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
+            auto tool_choice = p.choice();
 
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
+                std::string  name     = function.at("name");
+                const auto & params   = function.at("parameters");
 
-                // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
-                if (allow_python_tag_builtin_tools) {
-                    handle_builtin_tool(name, parameters);
-                }
-                tool_rules.push_back(
-                    builder.add_rule(
-                        name + "-call",
-                        "\"{\" space "
-                        "( \"\\\"type\\\"\"       space \":\" space \"\\\"function\\\"\"     space \",\" space )? "
-                        "  \"\\\"name\\\"\"       space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
-                        "  \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
-                        "\"}\" space"));
-            });
-            // Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "(\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\")[\\s\\S]*", // + name + "\"[\\s\\S]*",
+                auto func_name  = p.literal(" to=functions.") + p.tool_name(p.literal(name));
+                auto constraint = p.optional(p.space() + p.optional(p.literal("<|constrain|>")) + constrain_type);
+                auto args       = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
+
+                // recipient in role header
+                //   <|start|>assistant to=functions.NAME<|channel|>(commentary|analysis)[constraint]<|message|>ARGS
+                auto tool_in_role = p.tool(p.tool_open(func_name + channel + constraint + p.literal("<|message|>")) + args);
+
+                // recipient in channel header
+                //   <|channel|>(commentary|analysis) to=functions.NAME[constraint]<|message|>ARGS
+                auto tool_in_channel = p.tool(p.tool_open(channel + func_name + constraint + p.literal("<|message|>")) + args);
+
+                tool_choice |= p.rule("tool-" + name, tool_in_role | tool_in_channel);
             });
-            if (!builtin_tools.empty()) {
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            // Allow a few empty lines on top of the usual constrained json schema space rule.
-            builder.add_rule("root", string_join(tool_rules, " | "));
-            data.additional_stops.push_back("<|eom_id|>");
-        });
-        data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
-            ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
-            : COMMON_CHAT_FORMAT_LLAMA_3_X;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json {
-        {"date_string", format_time(inputs.now, "%d %b %Y")},
-        {"tools_in_user_message", false},
-        {"builtin_tools", builtin_tools},
-    });
-    return data;
-}
 
-static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
+            auto tool_call  = p.trigger_rule("tool-call", tool_choice);
 
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_NEMOTRON_V2;
+            if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+                return p.zero_or_more(start + any) + start + tool_call;
+            }
 
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+            return p.zero_or_more(start + any) + start + (tool_call | final_msg);
         }
-    }
 
-    // When tools are present, build grammar for the <TOOLCALL> format, similar to CommandR, but without tool call ID
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
+        return p.zero_or_more(start + any) + start + (final_msg | unsolicited);
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
         data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { "name",
-                            {
-                                { "type", "string" },
-                                { "const", function.at("name") },
-                            } },
-                            { "arguments", function.at("parameters") },
-                        }                                                                        },
-                    { "required",   json::array({ "name", "arguments" }) },
-                });
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
             });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
             }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                                    "\"<TOOLCALL>\" " + builder.add_schema("tool_calls", schema) +
-                                    " \"</TOOLCALL>\"");
+            parser.build_grammar(builder, data.grammar_lazy);
         });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the </think> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(</think>\\s*)" :
-                            "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                "(<TOOLCALL>)[\\s\\S]*" });
+
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^\\s+to$" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^<\\|channel\\|>(?:commentary|analysis)\\s+to=functions$" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "<\\|start\\|>assistant(\\s+to)" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "<\\|start\\|>assistant(<\\|channel\\|>(?:commentary|analysis)\\s+to)" }
+        };
     }
+
     return data;
 }
 
-static common_chat_params common_chat_params_init_qwen3_coder(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_gemma4(const common_chat_template &    tmpl,
+                                                         const autoparser::generation_params & inputs) {
     common_chat_params data;
 
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_PEG_CONSTRUCTED;
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
 
-    // Nemotron Nano 3 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
-    bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
-
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
+    if (inputs.add_generation_prompt && string_ends_with(data.prompt, "<turn|>\n")) {
+        // This may happen if the model generates content + tool_call, the
+        // template does not add the model's next turn and confuses the model
+        // from emitting its proper reasoning token sequence.
+        data.prompt += "<|turn>model\n";
     }
 
+    data.format            = COMMON_CHAT_FORMAT_PEG_GEMMA4;
+    data.supports_thinking  = true;
+    data.thinking_start_tag = "<|channel>thought";
+    data.thinking_end_tag   = "<channel|>";
+
     data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
+        "<|channel>",
+        "<channel|>",
+        "<|tool_call>",
+        "<tool_call|>",
+        "<|turn>",
     };
 
-    if (supports_reasoning) {
-        data.preserved_tokens.insert(data.preserved_tokens.end(), {"<think>", "</think>"});
-    }
+    auto has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
+    auto include_grammar     = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
+    auto extract_reasoning   = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
 
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
-    auto include_grammar = true;
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto start = p.rule("start", p.prefix(inputs.generation_prompt, "<|channel>"));
 
-    auto parser = build_chat_peg_constructed_parser([&](auto & p) {
-        auto reasoning = p.eps();
-        if (supports_reasoning && inputs.enable_thinking && extract_reasoning) {
-            auto reasoning_content = p.reasoning(p.until("</think>")) + ("</think>" | p.end());
-            if (data.thinking_forced_open) {
-                reasoning = reasoning_content;
-            }
+        if (extract_reasoning) {
+            p.rule("thought", p.literal("<|channel>thought") + p.space() + p.reasoning(p.until("<channel|>")) + p.literal("<channel|>"));
+        } else {
+            p.rule("thought", p.content(p.literal("<|channel>thought") + p.space() + p.until("<channel|>") + p.literal("<channel|>")));
         }
 
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            return reasoning << p.content(p.schema(p.json(), "response-format", inputs.json_schema));
+        auto consume_empty_channels = p.gbnf(p.zero_or_more(p.literal("<|channel>") + p.negate(p.literal("thought"))), "");
+        auto thought = (p.peek(p.literal("<|channel>")) + consume_empty_channels + p.ref("thought")) | p.negate(p.literal("<|channel>"));
+
+        if (has_response_format) {
+            auto response_format = p.literal("```json") <<
+                p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)) <<
+                p.literal("```");
+            return start + p.optional(thought) + response_format;
         }
 
-        // Tool call parser
         if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
+            // Gemma4 tool calling syntax
+            // Rules should match traversal logic in gemma4_to_json()
+            p.rule("gemma4-string-content", p.until("<|\"|>"));
+            p.rule("gemma4-string", p.literal("<|\"|>") + p.ref("gemma4-string-content") + p.literal("<|\"|>"));
+            p.rule("gemma4-bool", p.json_bool());
+            p.rule("gemma4-null", p.json_null());
+            p.rule("gemma4-number", p.json_number());
+            p.rule("gemma4-dict-key", p.rule("gemma4-dict-key-name", p.chars("[^:}]", 1, -1)) + p.literal(":"));
+            p.rule("gemma4-dict-kv", p.ref("gemma4-dict-key") + p.space() + p.ref("gemma4-value"));
+            p.rule("gemma4-dict", [&]() {
+                auto ws = p.space();
+                auto member = p.ref("gemma4-dict-kv");
+                auto members = p.sequence({member, p.zero_or_more(p.sequence({p.literal(","), ws, member}))});
+                return p.sequence({
+                    p.literal("{"), ws,
+                    p.choice({p.literal("}"), p.sequence({members, ws, p.literal("}")})})
+                });
+            });
+            p.rule("gemma4-array", [&]() {
+                auto ws = p.space();
+                auto value = p.ref("gemma4-value");
+                auto elements = p.sequence({value, p.zero_or_more(p.sequence({p.literal(","), ws, value}))});
+                return p.sequence({
+                    p.literal("["), ws,
+                    p.choice({p.literal("]"), p.sequence({elements, ws, p.literal("]")})})
+                });
+            });
+            p.rule("gemma4-value", [&]() {
+                return p.choice({
+                    p.ref("gemma4-string"), p.ref("gemma4-dict"), p.ref("gemma4-array"),
+                    p.ref("gemma4-number"), p.ref("gemma4-bool"), p.ref("gemma4-null")
+                });
+            });
+
             auto tool_choice = p.choice();
+
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
+                std::string  name     = function.at("name");
+                // TODO @aldehir : need to extend json-schema-to-grammar to produce more than JSON rules
+                // const auto & params   = function.at("parameters");
 
-                auto schema_info = common_schema_info();
-                schema_info.resolve_refs(parameters);
+                tool_choice |= p.rule("tool-" + name, p.tool(p.sequence({
+                    p.tool_open(p.tool_name(p.literal(name)) + p.peek(p.literal("{"))),
+                    p.tool_args(p.ref("gemma4-dict")),
+                })));
+            });
 
-                auto tool_open = "<function=" + p.tool_name(p.literal(name)) + ">\n";
-                auto tool_close = p.literal("</function>\n");
-                auto args = p.sequence();
-                auto arg_string = p.rule("xml-arg-string", p.until_one_of({
-                    "\n</parameter>",
-                    "\n<parameter=",
-                    "\n</function>"
-                }));
+            auto tool_call = p.trigger_rule("tool-call", p.repeat(
+                "<|tool_call>call:" + tool_choice + "<tool_call|>",
+                /* min = */ inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0,
+                /* max = */ inputs.parallel_tool_calls ? -1 : 1
+            ));
 
-                foreach_parameter(function, [&](const auto & param_name, const json & param_schema, bool is_required) {
-                    auto rule_name = "tool-" + name + "-arg-" + param_name;
+            auto scan_to_toolcall = p.rule("scan-to-toolcall", p.until("<|tool_call>"));
+            auto content = p.rule("content", p.content(p.until_one_of({"<|channel>", "<channel|>", "<|tool_call>"})));
+            auto message = p.rule("message", thought + content);
+            return start + p.zero_or_more(message) + scan_to_toolcall + tool_call;
+        }
 
-                    auto arg_open = "<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">\n";
-                    auto arg_close = p.literal("</parameter>\n");
-                    auto arg_value = p.eps();
-
-                    if (schema_info.resolves_to_string(param_schema)) {
-                        arg_value = p.tool_arg_string_value(arg_string) + "\n";
-                    } else {
-                        arg_value = p.tool_arg_json_value(p.schema(p.json(), rule_name + "-schema", param_schema));
-                    }
-
-                    // Model may or my not close with </parameter>
-                    auto arg_rule = p.rule(rule_name, p.tool_arg_open(arg_open) + arg_value + p.optional(p.tool_arg_close(arg_close)));
-                    args += p.repeat(arg_rule, /* min = */ is_required ? 1 : 0, /* max = */ 1);
-                });
-
-                tool_choice |= p.rule("tool-" + name, p.tool_open(tool_open) + args + p.tool_close(tool_close));
-            });
-
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-            auto tool_call = p.rule("tool-call", "<tool_call>\n" + tool_choice + "</tool_call>" + p.space());
-            auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
-
-            return reasoning << p.content(p.until("<tool_call>")) << tool_calls;
-        }
-
-        // Content only parser
-        include_grammar = false;
-        return reasoning << p.content(p.rest());
+        // Gemma 4 may emit an extra <|channel>thought\n<channel|> at the end of the content. It may
+        // also emit a single trailing <channel|> token. Consume all complete reasoning blocks and
+        // then stop at the first unmatched <channel|> token.
+        auto content = p.rule("content", p.content(p.until_one_of({"<|channel>", "<channel|>"})));
+        auto message = p.rule("message", thought + content);
+        return start + p.one_or_more(message);
     });
 
     data.parser = parser.save();
 
     if (include_grammar) {
-        data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
-
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
+        data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                auto schema = function.at("parameters");
+                auto         schema   = function.at("parameters");
                 builder.resolve_refs(schema);
             });
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
+            }
             parser.build_grammar(builder, data.grammar_lazy);
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>"}
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call>" },
         };
     }
 
     return data;
 }
 
-
-static common_chat_params common_chat_params_init_apertus(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// Functionary v3.2 - uses recipient-based format: >>>recipient\n{content}
+static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template &    tmpl,
+                                                                   const autoparser::generation_params & inputs) {
     common_chat_params data;
 
-    // Generate the prompt using the apply() function with the template
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_APERTUS;
+    data.prompt           = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format           = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.preserved_tokens = {
+        ">>>all",
+    };
 
-    // Handle thinking tags appropriately based on inputs.enable_thinking
-    if (string_ends_with(data.prompt, "<|inner_prefix|>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "<|inner_suffix|>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
 
-    // When tools are present, build grammar for the <|tools_prefix|> format
-    if (!inputs.tools.is_null() && inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = true;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    { "type",       "object"                                                   },
-                    { "properties",
-                        {
-                            { function.at("name"), function.at("parameters") }
-                        }                                                                        },
-                    { "required",   json::array({ function.at("name") }) },
-                });
-            });
-            auto schema = json{
-                        { "type",     "array"                                                         },
-                        { "items",    schemas.size() == 1 ? schemas[0] : json{ { "anyOf", schemas } } },
-                        { "minItems", 1                                                               },
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root",
-                                std::string(data.thinking_forced_open ? "( \"<|inner_suffix|>\" space )? " : "") +
-                                    "\"<|tools_prefix|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tools_suffix|>\"");
-                            });
-        data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-            // If thinking_forced_open, then we capture the <|inner_suffix|> tag in the grammar,
-            // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-            std::string(data.thinking_forced_open ?
-                            "[\\s\\S]*?(<\\|inner_suffix\\|>\\s*)" :
-                            "(?:<\\|inner_prefix\\|>[\\s\\S]*?<\\|inner_suffix\\|>\\s*)?") +
-                "(<\\|tools_prefix\\|>)[\\s\\S]*" });
-        data.preserved_tokens = {
-            "<|system_start|>",
-            "<|system_end|>",
-            "<|developer_start|>",
-            "<|developer_end|>",
-            "<|user_start|>",
-            "<|user_end|>",
-            "<|assistant_start|>",
-            "<|assistant_end|>",
-            "<|inner_prefix|>",
-            "<|inner_suffix|>",
-            "<|tools_prefix|>",
-            "<|tools_suffix|>",
-        };
-    }
-    return data;
-}
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Functionary v3.2 format:
+        // - Normal content: >>>all\n{content}
+        // - Tool calls: >>>function_name\n{json_args}
+        // Generation prompt ends with ">>>" so model outputs recipient immediately
 
-static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    auto prompt = apply(tmpl, inputs);
-
-    // Hacks to fix the official (broken) prompt.
-    // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead,
-    // until the official template is fixed.
-    if (tmpl.source().find("{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}") != std::string::npos) {
-        // Don't leave the chat dangling after tool results
-        if (string_ends_with(prompt, "<｜tool▁outputs▁end｜>")) {
-            prompt += "<｜end▁of▁sentence｜>";
-            if (inputs.add_generation_prompt) {
-                prompt += "<｜Assistant｜>";
-            }
-        }
-        // Fix up tool call delta example added by Minja
-        prompt = std::regex_replace(
-            prompt,
-            std::regex("(<｜tool▁call▁end｜>)[\\s\\r\\n]*(<｜tool▁outputs▁begin｜>|<｜User｜>)"),
-            "$1<｜tool▁calls▁end｜><｜end▁of▁sentence｜>$2");
-    }
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+        // Build content parser for >>>all\n{content}
+        // When tools are present, content stops before the next ">>>" (tool call)
+        // When no tools, content goes until end
+        auto content_until_tool = p.literal("all\n") + p.content(p.until(">>>"));
+        auto content_until_end  = p.literal("all\n") + p.content(p.rest());
+        auto generation_prompt  = p.literal(inputs.generation_prompt);
+
+        // If no tools or tool_choice is NONE, just parse content
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            // When no tools, just match the prefix and capture everything after
+            return generation_prompt + content_until_end + p.end();
         }
-    }
 
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"function<｜tool▁sep｜>" + name + "\\n"
-                    "```json\\n\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"```<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜",
-            };
-        });
-    }
-    return data;
-}
+        // Build tool call parsers for each available function
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
 
-static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
+            // Tool format: >>>function_name\n{json_args}
+            auto tool_parser = p.tool(
+                p.tool_open(p.tool_name(p.literal(name)) + p.literal("\n")) +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema))
+            );
 
-    // Pass thinking context for DeepSeek V3.1 template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
+            tool_choice |= p.rule("tool-" + name, tool_parser);
+        });
 
-    auto prompt = apply(tmpl, inputs,
-                       /* messages_override= */ inputs.messages,
-                       /* tools_override= */ std::nullopt,
-                       additional_context);
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
-    if (string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
+        auto content_only = content_until_end;
+        auto tools_only = p.trigger_rule("tools", p.one_or_more(tool_choice));
+        auto content_and_tools = content_until_tool + tools_only;
+
+        auto ret = p.eps();
+        if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
+            if (inputs.parallel_tool_calls) {
+                ret = p.choice({ content_and_tools, tools_only }) + p.end();
+            } else {
+                ret = p.choice({ content_until_tool + tool_choice, tools_only }) + p.end();
+            }
+        } else if (inputs.parallel_tool_calls) {
+            ret = p.choice({ content_and_tools, content_only, tools_only }) + p.end();
         } else {
-            data.thinking_forced_open = true;
+            auto content_and_tool = content_until_tool + tool_choice;
+            ret = p.choice({ content_and_tool, content_only, tool_choice }) + p.end();
         }
-    }
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
+        return generation_prompt + ret;
+    });
+
+    data.parser = parser.save();
+
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                    "( \"<｜tool▁call▁begin｜>\" )? \"" + name + "<｜tool▁sep｜>"
-                    "\" " + builder.add_schema(name + "-args", parameters) + " "
-                    "\"<｜tool▁call▁end｜>\""));
-            });
-            // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
-            // so we accept common variants (then it's all constrained)
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                "( \"<｜tool▁calls▁begin｜>\" | \"<｜tool_calls_begin｜>\" | \"<｜tool calls begin｜>\" | \"<｜tool\\\\_calls\\\\_begin｜>\" | \"<｜tool▁calls｜>\" ) "
-                "(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
-                "\"<｜tool▁calls▁end｜>\""
-                " space");
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
-                    "(<｜tool▁calls▁begin｜>|<｜tool_calls_begin｜>|<｜tool calls begin｜>|<｜tool\\\\_calls\\\\_begin｜>|<｜tool▁calls｜>)[\\s\\S]*"
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
             });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<｜tool▁calls▁begin｜>",
-                "<｜tool▁call▁begin｜>",
-                "<｜tool▁sep｜>",
-                "<｜tool▁call▁end｜>",
-                "<｜tool▁calls▁end｜>",
-            };
+            parser.build_grammar(builder, data.grammar_lazy);
         });
-    }
-    return data;
-}
 
-static common_chat_params common_chat_params_init_minimax_m2(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_MINIMAX_M2;
-
-    // Handle thinking tags based on prompt ending
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            // Close the thinking tag immediately if thinking is disabled
-            data.prompt += "</think>\n\n";
-        } else {
-            // Mark thinking as forced open (template started with <think>)
-            data.thinking_forced_open = true;
-        }
+        // Grammar trigger for when the model starts outputting a tool call
+        // (after the initial ">>>" in the generation prompt but recipient other than "all")
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, ">>>(?!all)" }
+        };
     }
 
-    // Preserve MiniMax-M2 special tokens
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
-        "<minimax:tool_call>",
-        "</minimax:tool_call>",
-    };
-
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "<minimax:tool_call>\n",
-        /* form.tool_start  = */ "<invoke name=\"",
-        /* form.tool_sep    = */ "\">\n",
-        /* form.key_start   = */ "<parameter name=\"",
-        /* form.key_val_sep = */ "\">",
-        /* form.val_end     = */ "</parameter>\n",
-        /* form.tool_end    = */ "</invoke>\n",
-        /* form.scope_end   = */ "</minimax:tool_call>",
-    };
-    build_grammar_xml_tool_call(data, params.tools, form);
-
     return data;
 }
 
-static common_chat_params common_chat_params_init_qwen3_coder_xml(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_QWEN3_CODER_XML;
-
-    // Qwen3.5 and Step-3.5-Flash use the Qwen3 Coder tool calling with thinking
-    bool supports_reasoning = (tmpl.source().find("<think>") != std::string::npos);
-
-    if (supports_reasoning && string_ends_with(data.prompt, "<think>\n")) {
-        if (!params.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-        "<function=",
-        "</function>",
-        "<parameter=",
-        "</parameter>",
-    };
-
-    if (supports_reasoning) {
-        data.preserved_tokens.insert(data.preserved_tokens.end(), { "<think>", "</think>" });
-    }
-
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form{};
-        form.scope_start = "";
-        form.tool_start = "\n<tool_call>\n<function=";
-        form.tool_sep = ">\n";
-        form.key_start = "<parameter=";
-        form.key_val_sep = ">\n";
-        form.val_end = "\n</parameter>\n";
-        form.tool_end = "</function>\n</tool_call>";
-        form.scope_end = "";
-        form.relax_arg = true;
-        return form;
-        })();
-        build_grammar_xml_tool_call(data, params.tools, form);
-
-        return data;
-}
-
-static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template & tmpl, const struct templates_params & params) {
+// Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+// The ID contains both the function name and an incrementing counter
+static common_chat_params common_chat_params_init_kimi_k2(const common_chat_template &    tmpl,
+                                                          const autoparser::generation_params & inputs) {
     common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_KIMI_K2;
 
-    data.preserved_tokens = {
-        "<think>",
-        "</think>",
+    data.prompt             = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format             = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking  = true;
+    data.preserved_tokens  = {
         "<|tool_calls_section_begin|>",
+        "<|tool_calls_section_end|>",
         "<|tool_call_begin|>",
         "<|tool_call_argument_begin|>",
         "<|tool_call_end|>",
-        "<|tool_calls_section_end|>",
-        "<|im_end|>",
-        "<|im_system|>",
-        "<|im_middle|>",
+        "<think>",
+        "</think>",
     };
 
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|im_end|>",
-        "<|im_middle|>"
-    });
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<|tool_calls_section_begin|>";
-        form.tool_start  = "<|tool_call_begin|>";
-        form.tool_sep    = "<|tool_call_argument_begin|>{";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}<|tool_call_end|>";
-        form.scope_end   = "<|tool_calls_section_end|>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
-
-    return data;
-}
-
-static common_chat_params common_chat_params_init_apriel_1_5(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_APRIEL_1_5;
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    const std::string SECTION_BEGIN = "<|tool_calls_section_begin|>";
+    const std::string SECTION_END   = "<|tool_calls_section_end|>";
+    const std::string CALL_BEGIN    = "<|tool_call_begin|>";
+    const std::string ARGS_BEGIN    = "<|tool_call_argument_begin|>";
+    const std::string CALL_END      = "<|tool_call_end|>";
+
+    const std::string THINK_START = "<think>";
+    const std::string THINK_END   = "</think>";
+
+    data.thinking_start_tag = THINK_START;
+    data.thinking_end_tag   = THINK_END;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Kimi K2 Thinking format:
+        // - Reasoning: <think>{reasoning}</think>
+        // - Content: text after reasoning
+        // - Tool calls section:
+        //   <|tool_calls_section_begin|>
+        //   <|tool_call_begin|>functions.<name>:<index><|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+        //   ...
+        //   <|tool_calls_section_end|>
+        // The ID format is: functions.<function_name>:<counter> where counter is 0, 1, 2, ...
+
+        // Tool call markers
+        auto end = p.end();
+
+        // Note: this model is CRAZY. It can diverge from its supposed tool calling pattern in so many ways it's not funny.
+        // For example, it can call tools at the end of reasoning without closing reasoning...
+        auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning(
+            p.until_one_of({ THINK_END, "<|tool_calls_section_begin|>", "<|tool_call_begin|>" })) +
+            p.optional(p.literal(THINK_END))) : p.eps();
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+
+
+        // Content only parser (no tools)
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
+        }
+
+        // Build tool call parsers for each available function
+        // The ID format is: functions.<name>:<index>
+        // We need to match: functions.<name>:<digits>
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            const auto & schema   = function.at("parameters");
+
+            // Match: functions.<name>:<digits>
+            // Capture the full call id (functions.<name>:<digits>) using tool_id tag
+            auto tool_id = p.tool_id(p.literal("functions.") + p.tool_name(p.literal(name)) + p.literal(":") + p.chars("[0-9]", 1, -1));
+            auto tool_parser = p.tool(
+                p.tool_open(tool_id + p.literal(ARGS_BEGIN)) +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) +
+                p.tool_close(p.optional((p.literal(CALL_END))))
+            );
 
-    data.preserved_tokens = {
-        "<thinking>",
-        "</thinking>",
-        "<tool_calls>",
-        "</tool_calls>",
-    };
+            tool_choice |= p.rule("tool-" + name, tool_parser);
+        });
 
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "<tool_calls>[";
-        form.tool_start  = "{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}, ";
-        form.scope_end   = "]</tool_calls>";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        form.last_tool_end = "}";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
+        // Tool calls section: <|tool_calls_section_begin|> tool_calls <|tool_calls_section_end|>
+        auto min_calls  = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+        auto max_calls  = inputs.parallel_tool_calls ? -1 : 1;
+        // Use trigger_rule so grammar generator knows where to start generating rules
+        auto tool_calls = p.rule("tool-calls",
+            p.optional(p.literal(SECTION_BEGIN)) +
+            p.trigger_rule("tool-call", p.repeat(CALL_BEGIN + tool_choice, min_calls, max_calls) +
+                p.optional(p.literal(SECTION_END)))
+        );
 
-    return data;
-}
+        auto content_before_tools = p.content(p.until_one_of({ SECTION_BEGIN, CALL_BEGIN }));
 
-static common_chat_params common_chat_params_init_xiaomi_mimo(const common_chat_template & tmpl, const struct templates_params & params) {
-    common_chat_params data;
-    data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+        return generation_prompt + reasoning + content_before_tools + tool_calls + end;
+    });
 
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_XIAOMI_MIMO;
+    data.parser = parser.save();
 
-    data.preserved_tokens = {
-        "<tool_call>",
-        "</tool_call>",
-    };
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
+            });
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
 
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form {};
-        form.scope_start = "\n";
-        form.tool_start  = "<tool_call>\n{\"name\": \"";
-        form.tool_sep    = "\", \"arguments\": {";
-        form.key_start   = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end     = ", ";
-        form.tool_end    = "}\n</tool_call>";
-        form.scope_end   = "";
-        form.raw_argval  = false;
-        form.last_val_end = "";
-        return form;
-    })();
-    build_grammar_xml_tool_call(data, params.tools, form);
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_call_begin|>" }
+        };
+    }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_mirothinker(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// MiroThinker - uses MCP style toolcalling
+static common_chat_params common_chat_params_init_mirothinker(const common_chat_template & tmpl,
+    const autoparser::generation_params & inputs) {
     common_chat_params data;
-    //data.grammar_lazy = params.tools.is_array() && !params.tools.empty() && params.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    //// Disable every Minja polyfill
-    //minja::chat_template_options topts;
-    //topts.apply_polyfills = true;
-    //topts.polyfill_tools = false;
-    //topts.polyfill_tool_call_examples = false;
-    //topts.polyfill_tool_calls = false;
-    //topts.polyfill_tool_responses = false;
-    //topts.polyfill_system_role = false;
-    //topts.polyfill_object_arguments = true;
-    //topts.polyfill_typed_content = false;
-    //topts.use_bos_token = true;
-    //topts.use_eos_token = true;
-
-    //data.prompt = apply(tmpl, params, std::nullopt, std::nullopt, std::nullopt, topts);
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_MIROTHINKER;
 
+    data.prompt = common_chat_template_direct_apply(tmpl, inputs);
+    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.thinking_start_tag = "<think>";
+    data.thinking_end_tag = "</think>";
     data.preserved_tokens = {
-        "<use_mcp_tool>", "</use_mcp_tool>",
-        "<server_name>", "</server_name>",
-        "<tool_name>", "</tool_name>",
-        "<arguments>", "</arguments>",
+        "<think>",
+        "</think>",
     };
 
-    // build grammar for tool call
-    static const xml_tool_call_format form = ([]() {
-        xml_tool_call_format form{};
-        form.scope_start = "<use_mcp_tool>\n";
-        form.tool_start = "<server_name>\n";
-        form.tool_sep = "</tool_name>\n<arguments>\n{";
-        form.key_start = "\"";
-        form.key_val_sep = "\": ";
-        form.val_end = ", ";
-        form.tool_end = "}\n</arguments>";
-        form.scope_end = "</use_mcp_tool>";
-        form.raw_argval = false;
-        form.last_val_end = "";
-        return form;
-        })();
-        build_grammar_xml_tool_call(data, inputs.tools, form);
-
-        return data;
-}
-
-static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
-        auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
-
-        if (has_reasoning_content && has_tool_calls) {
-            auto adjusted_message = msg;
-            adjusted_message["thinking"] = msg.at("reasoning_content");
-            adjusted_message.erase("content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
+    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // MiroThinker Thinking format:
+        // - Reasoning: <think>{reasoning}</think>
+        // - Content: text after reasoning
+        // - Tool calls section:
+        //   <use_mcp_tool>
+        //   <server_name>{server_name}</server_name>
+        //   <tool_name>{tool_name}</tool_name>
+        //   <arguments>
+        //   {json_args}
+        //   </arguments>
+        //   ...
+        //   </use_mcp_tool>
+
+        auto reasoning = extract_reasoning ? p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>") : p.eps();
+
+        // Tool call markers
+        const std::string SECTION_BEGIN = "<use_mcp_tool>";
+        const std::string SECTION_END = "</use_mcp_tool>";
+        const std::string CALL_BEGIN = "<server_name>";
+        const std::string ARGS_BEGIN = "<arguments>";
+        const std::string CALL_END = "</arguments>";
+
+        auto end = p.end();
+
+        // Content only parser (no tools)
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return reasoning + p.content(p.rest()) + end;
+        }
+
+        // Build tool call parsers for each available function
+        // Function name format is: <tool_name>{tool_name}</tool_name>
+        // We need to match: {what_ever}</server_name>{spaces}<tool_name>{tool_name}</tool_name>
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name = function.at("name");
+            const auto & schema = function.at("parameters");
+
+            // Match: {what_ever}</server_name>{spaces}<tool_name>{tool_name}</tool_name>
+            auto tool_parser = p.tool(
+                p.tool_open(
+                    p.until("</server_name>") +
+                    p.literal("</server_name>") +
+                    p.space() +
+                    p.literal("<tool_name>") +
+                    p.tool_name(p.literal(name)) +
+                    p.literal(ARGS_BEGIN)
+                ) + p.space() +
+                p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)) +
+                p.space() + p.tool_close(p.literal(CALL_END))
+            );
 
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
+            tool_choice |= p.rule("tool-" + name, tool_parser);
+            });
 
-    // Check if we need to replace the return token with end token during
-    // inference and without generation prompt. For more details see:
-    // https://github.com/ggml-org/llama.cpp/issues/15417
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|return|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
+        // Tool calls section: <use_mcp_tool> tool_calls </use_mcp_tool>
+        auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+        auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
+        auto tool_calls = p.trigger_rule("tool-calls",
+            p.literal(SECTION_BEGIN) + p.space() +
+            p.rule("tool-call", p.repeat(CALL_BEGIN + tool_choice, min_calls, max_calls) +
+                p.space() + p.literal(SECTION_END))
+        );
 
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GPT_OSS;
+        auto content_before_tools = p.content(p.until(SECTION_BEGIN));
 
-    // These special tokens are required to parse properly, so we include them
-    // even if parse_tool_calls is false.
-    data.preserved_tokens = {
-        "<|channel|>",
-        "<|constrain|>",
-        "<|message|>",
-        "<|start|>",
-        "<|end|>",
-    };
-
-    if (!inputs.json_schema.is_null()) {
-        data.grammar_lazy = false;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schema = inputs.json_schema;
-            builder.resolve_refs(schema);
-
-            auto not_end = builder.add_rule("not-end",
-                "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-            auto analysis = builder.add_rule("analysis",
-                "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-            auto constraint = builder.add_rule("constraint", "\"<|constrain|>\"? [a-zA-Z0-9_-]+");
-            auto final = builder.add_rule("final",
-                "\"<|channel|>final\" ( \" \" " + constraint + " )? \"<|message|>\" " +
-                builder.add_schema("response", schema)
-            );
-
-            builder.add_rule("root", "( " + analysis + " \"<|start|>assistant\" )? " + final);
+        return reasoning + content_before_tools + tool_calls + end;
         });
-    }
 
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            // tool calls can appear in commentary or analysis channels
-            auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
+    data.parser = parser.save();
 
-            std::vector<std::string> tool_rules_recipient_in_role;
-            std::vector<std::string> tool_rules_recipient_in_channel;
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                tool_rules_recipient_in_role.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-
-                tool_rules_recipient_in_channel.push_back(
-                    builder.add_rule(name + "-call",
-                        "\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
-                        builder.add_schema(name + "-args", parameters)
-                    )
-                );
-            });
-
-            auto recipient_in_channel = builder.add_rule("recipient_in_channel",
-                channel + " \" to=functions.\" ( " +
-                string_join(tool_rules_recipient_in_channel, " | ") + " )"
-            );
-
-            if (data.grammar_lazy) {
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\"<|start|>assistant\"? \" to=functions.\" ( " +
-                    string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
-            } else {
-                auto not_end = builder.add_rule("not-end",
-                    "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
-                auto analysis = builder.add_rule("analysis",
-                    "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-                auto commentary = builder.add_rule("commentary",
-                    "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
-
-                auto recipient_in_role = builder.add_rule("recipient_in_role",
-                    "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
-                );
-
-                builder.add_rule("root",
-                    "( " + analysis + " \"<|start|>assistant\" )? " +
-                    "( " + commentary + " \"<|start|>assistant\" )? " +
-                    "( " + recipient_in_role + " | " + recipient_in_channel + " )"
-                );
-            }
-
-            // Trigger on tool calls that appear in the commentary channel
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|channel\\|>(?:commentary|analysis) to"
-            });
-
-            // Trigger tool calls that appear in the role section, either at the
-            // start or in the middle.
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                "^ to"
+                auto         schema = function.at("parameters");
+                builder.resolve_refs(schema);
+                });
+            parser.build_grammar(builder, data.grammar_lazy);
             });
 
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                "<\\|start\\|>assistant to"
-            });
-        });
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<use_mcp_tool>" }
+        };
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_glm_4_5(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// LFM2 format:
+// - Reasoning: <think>{reasoning}</think> (optional, only if enable_thinking is true)
+// - Content: text after reasoning (optional)
+// - Tool calls: <|tool_call_start|>[function_name(arg1="value1", arg2="value2")]<|tool_call_end|>
+// Tool calls can appear multiple times (parallel tool calls)
+// LFM2 format: uses <|tool_list_start|>[...]<|tool_list_end|> in system prompt
+// and <|tool_call_start|>[name(arg="val")]<|tool_call_end|> for tool calls.
+// - Reasoning: <think>{reasoning}</think> (optional)
+// - Content: text before a tool call (optional)
+// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
+//   Tool calls can appear multiple times (parallel tool calls supported)
+static common_chat_params common_chat_params_init_lfm2(const common_chat_template &    tmpl,
+                                                       const autoparser::generation_params & inputs) {
     common_chat_params data;
-    data.grammar_lazy = inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-
-    std::string prompt = apply(tmpl, inputs);
-
-    // match the existing trimming behavior
-    if (inputs.add_bos && string_starts_with(prompt, tmpl.bos_token())) {
-        prompt.erase(0, tmpl.bos_token().size());
-    }
-    if (inputs.add_eos && string_ends_with(prompt, tmpl.eos_token())) {
-        prompt.erase(prompt.size() - tmpl.eos_token().size());
-    }
-    if (string_ends_with(prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
 
-    // add GLM preserved tokens
-    data.preserved_tokens = {
-        "<|endoftext|>",
-        "[MASK]",
-        "[gMASK]",
-        "[sMASK]",
-        "<sop>",
-        "<eop>",
-        "<|system|>",
-        "<|user|>",
-        "<|assistant|>",
-        "<|observation|>",
-        "<|begin_of_image|>",
-        "<|end_of_image|>",
-        "<|begin_of_video|>",
-        "<|end_of_video|>",
-        "<|begin_of_audio|>",
-        "<|end_of_audio|>",
-        "<|begin_of_transcription|>",
-        "<|end_of_transcription|>",
-        "<|code_prefix|>",
-        "<|code_middle|>",
-        "<|code_suffix|>",
-        "/nothink",
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<|tool_list_start|>",
+        "<|tool_list_end|>",
+        "<|tool_call_start|>",
+        "<|tool_call_end|>",
         "<think>",
         "</think>",
-        "<tool_call>",
-        "</tool_call>",
-        "<arg_key>",
-        "</arg_key>",
-        "<arg_value>",
-        "</arg_value>"
     };
 
-    // extra GLM 4.5 stop word
-    data.additional_stops.insert(data.additional_stops.end(), {
-        "<|user|>",
-        "<|observation|>"
-    });
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
 
-    // build grammar for tool call
-    static const xml_tool_call_format form {
-        /* form.scope_start = */ "",
-        /* form.tool_start  = */ "\n<tool_call>",
-        /* form.tool_sep    = */ "\n",
-        /* form.key_start   = */ "<arg_key>",
-        /* form.key_val_sep = */ "</arg_key>\n<arg_value>",
-        /* form.val_end     = */ "</arg_value>\n",
-        /* form.tool_end    = */ "</tool_call>\n",
-        /* form.scope_end   = */ "",
-    };
-    build_grammar_xml_tool_call(data, inputs.tools, form);
+    const std::string TOOL_CALL_START = "<|tool_call_start|>";
+    const std::string TOOL_CALL_END   = "<|tool_call_end|>";
+    const std::string THINK_START     = "<think>";
+    const std::string THINK_END       = "</think>";
 
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_GLM_4_5;
-    return data;
-}
+    data.thinking_start_tag = THINK_START;
+    data.thinking_end_tag   = THINK_END;
 
-static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    LOG_DBG("%s\n", __func__);
-    common_chat_params data;
-    const std::optional<json> additional_context = json {
-        {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
-        {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
-    };
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override =*/ std::nullopt, additional_context);
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            auto schemas = json::array();
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                schemas.push_back({
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {
-                            {"type", "string"},
-                            {"const", function.at("name")},
-                        }},
-                        {"arguments", function.at("parameters")},
-                    }},
-                    {"required", json::array({"name", "arguments", "id"})},
-                });
-            });
-            auto schema = json {
-                {"type", "array"},
-                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
-                {"minItems", 1},
-            };
-            if (!inputs.parallel_tool_calls) {
-                schema["maxItems"] = 1;
-            }
-            builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
-        });
-        data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, " functools["});
-        data.preserved_tokens = {
-            " functools[",
-        };
-        data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
-    return data;
-}
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+        auto end = p.end();
 
-static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
-    // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
-    // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
-    if (inputs.tools.is_array() && !inputs.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> first_tool_rules;
-            std::vector<std::string> subsequent_tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                std::string args_pattern = "[\\s\\S]*";
-                auto args_rule = builder.add_schema(name + "-args", parameters);
-                if (name == "python") {
-                    args_rule = builder.add_rule(name + "-maybe-raw-args", args_rule + " | [^{] .*");
-                } else {
-                    args_pattern = "\\{" + args_pattern;
-                }
-                auto call_rule = builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule);
-                first_tool_rules.push_back(call_rule);
-                if (inputs.parallel_tool_calls) {
-                    subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>\" " + call_rule));
-                }
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
-                    "((?:[\\s\\S]+?>>>)?" + regex_escape(name) + "\n)" + args_pattern,
-                });
-            });
-            data.preserved_tokens = {
-                "<|end_header_id|>",
-            };
-            auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
-            if (inputs.parallel_tool_calls) {
-                auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
-                builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
-            } else {
-                builder.add_rule("root", first_rule);
-            }
+        auto reasoning = p.eps();
+        if (extract_reasoning && inputs.enable_thinking) {
+            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        }
 
-        });
-    }
-    return data;
-}
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
+        }
+        auto tool_calls = p.rule("tool-calls",
+            p.trigger_rule("tool-call",
+                p.literal(TOOL_CALL_START) +
+                p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls) +
+                p.literal(TOOL_CALL_END)
+            )
+        );
 
-static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
-    common_chat_params data;
+        auto content = p.content(p.until(TOOL_CALL_START));
+
+        return generation_prompt + reasoning + content + tool_calls + end;
+    });
 
-    if (!inputs.tools.is_null()) {
-        std::string python_code_argument_name;
-        auto has_raw_python = false;
+    data.parser = parser.save();
 
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
             foreach_function(inputs.tools, [&](const json & tool) {
                 const auto & function = tool.at("function");
-                const auto & parameters = function.at("parameters");
-                std::string name = function.at("name");
-                if (name == "python" || name == "ipython") {
-                    if (!parameters.contains("type")) {
-                        throw std::runtime_error("Missing type in python tool");
-                    }
-                    has_raw_python = true;
-                    const auto & type = parameters.at("type");
-                    if (type == "object") {
-                        auto properties = parameters.at("properties");
-                        for (auto it = properties.begin(); it != properties.end(); ++it) {
-                            if (it.value().at("type") == "string") {
-                                if (!python_code_argument_name.empty()) {
-                                    throw std::runtime_error("Multiple string arguments found in python tool");
-                                }
-                                python_code_argument_name = it.key();
-                            }
-                        }
-                        if (python_code_argument_name.empty()) {
-                            throw std::runtime_error("No string argument found in python tool");
-                        }
-                    } else if (type != "string") {
-                        throw std::runtime_error("Invalid type in python tool: " + type.dump());
-                    }
-                }
-                tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
             });
-            if (has_raw_python) {
-                tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
-                data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
-                data.preserved_tokens.push_back("<|python_tag|>");
-            }
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
-            builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
-            data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
+            parser.build_grammar(builder, data.grammar_lazy);
         });
-        data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
-    } else {
-        data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    }
 
-    data.prompt = apply(tmpl, inputs);
-    // TODO: if (has_raw_python)
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, TOOL_CALL_START }
+        };
+    }
     return data;
 }
 
-static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) {
+// LFM2.5 format: uses plain "List of tools: [...]" in system prompt, no wrapper tokens.
+// Tool calls are bare [name(arg="val")], though model may optionally emit <|tool_call_start|>.
+// - Reasoning: <think>{reasoning}</think> (optional)
+// - Content: text before a tool call (optional)
+// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
+//   Tool calls can appear multiple times (parallel tool calls supported)
+static common_chat_params common_chat_params_init_lfm2_5(const common_chat_template &    tmpl,
+                                                         const autoparser::generation_params & inputs) {
     common_chat_params data;
 
-    json extra_context = json {
-        {"enable_thinking", inputs.enable_thinking},
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.preserved_tokens  = {
+        "<|tool_call_start|>",
+        "<|tool_call_end|>",
+        "<think>",
+        "</think>",
     };
-    extra_context.update(inputs.extra_context);
 
-    data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context);
-    data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
-    if (string_ends_with(data.prompt, "<think>\n")) {
-        if (!extra_context["enable_thinking"]) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
-        }
-    }
-
-    if (!inputs.tools.is_null()) {
-        // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            std::vector<std::string> tool_call_alts;
-            std::vector<std::string> escaped_names;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_schema(name + "-call", {
-                    {"type", "object"},
-                    {"properties", json {
-                        {"name", json {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                }));
-                tool_call_alts.push_back(builder.add_rule(
-                    name + "-function-tag",
-                    "\"<function\" ( \"=" + name + "\" | \" name=\\\"" + name + "\\\"\" ) \">\" space " +
-                    builder.add_schema(name + "-args", parameters) + " "
-                    "\"</function>\" space"));
-
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                    "<function=" + name + ">",
-                });
-                auto escaped_name = regex_escape(name);
-                data.grammar_triggers.push_back({
-                    COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                    "<function\\s+name\\s*=\\s*\"" + escaped_name + "\"",
-                });
-                escaped_names.push_back(escaped_name);
-            });
-            auto any_tool_call = builder.add_rule("any_tool_call", "( " + string_join(tool_rules, " | ") + " ) space");
-            std::vector<std::string> alt_tags {
-                any_tool_call,
-                "\"<tool_call>\" space "     + any_tool_call + " \"</tool_call>\"",
-                // The rest is just to accommodate common "good bad" outputs.
-                "\"<function_call>\" space " + any_tool_call + " \"</function_call>\"",
-                "\"<response>\"  space "     + any_tool_call + " \"</response>\"",
-                "\"<tools>\"     space "     + any_tool_call + " \"</tools>\"",
-                "\"<json>\"      space "     + any_tool_call + " \"</json>\"",
-                "\"<xml>\"      space "     + any_tool_call + " \"</xml>\"",
-                "\"<JSON>\"      space "     + any_tool_call + " \"</JSON>\"",
-            };
-            auto wrappable_tool_call = builder.add_rule("wrappable_tool_call", "( " + string_join(alt_tags, " | ") + " ) space");
-            tool_call_alts.push_back(wrappable_tool_call);
-            tool_call_alts.push_back(
-                "( \"```\\n\" | \"```json\\n\" | \"```xml\\n\" ) space " + wrappable_tool_call + " space \"```\" space ");
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_call_alts, " | "));
-            builder.add_rule("root",
-                std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
-                (inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call));
-            // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
-                // If thinking_forced_open, then we capture the </think> tag in the grammar,
-                // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
-                std::string(data.thinking_forced_open ? "(</think>\\s*)" : "") + (
-                    "\\s*("
-                    "(?:<tool_call>"
-                    "|<function"
-                    "|(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
-                    "\\s*\\{\\s*\"name\"\\s*:\\s*\"(?:" + string_join(escaped_names, "|") + ")\""
-                    ")"
-                    ")"
-                ),
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<tool_call>",
-                "</tool_call>",
-                "<function",
-                "<tools>",
-                "</tools>",
-                "<response>",
-                "</response>",
-                "<function_call>",
-                "</function_call>",
-                "<json>",
-                "</json>",
-                "<JSON>",
-                "</JSON>",
-                "```",
-                "```json",
-                "```xml",
-            };
-        });
-    }
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
 
-    return data;
-}
+    const std::string THINK_START     = "<think>";
+    const std::string THINK_END       = "</think>";
 
-static common_chat_params common_chat_params_init_granite(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
+    data.thinking_start_tag = THINK_START;
+    data.thinking_end_tag   = THINK_END;
 
-    // Pass thinking context for Granite template
-    json additional_context = {
-        {"thinking", inputs.enable_thinking},
-    };
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+        auto end = p.end();
 
-    data.prompt = apply(tmpl, inputs, /* messages_override= */ std::nullopt, /* tools_override= */ std::nullopt, additional_context);
-    data.format = COMMON_CHAT_FORMAT_GRANITE;
+        auto reasoning = p.eps();
+        if (extract_reasoning && inputs.enable_thinking) {
+            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        }
 
-    if (string_ends_with(data.prompt, "<think>\n") || string_ends_with(data.prompt, "<think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</think>";
-        } else {
-            data.thinking_forced_open = true;
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
         }
-    }
 
-    if (!inputs.tools.is_null()) {
-        // Granite uses <|tool_call|> followed by JSON list
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(inputs.tools, [&](const json & tool) {
-                const auto & function = tool.at("function");
-                std::string name = function.at("name");
-                auto parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-                tool_rules.push_back(builder.add_rule(name + "-call", builder.add_schema(name +
-"-args", {
-                    {"type", "object"},
-                    {"properties", {
-                        {"name", {{"const", name}}},
-                        {"arguments", parameters},
-                    }},
-                    {"required", json::array({"name", "arguments"})},
-                })));
-            });
+        auto tool_calls = p.rule("tool-calls",
+            p.trigger_rule("tool-call",
+                p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls)
+            )
+        );
 
-            auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | "));
-            auto tool_list = builder.add_rule("tool_list", "\"[\" space " + tool_call + " (\",\" space " + tool_call + ")* space \"]\"");
+        auto content = p.content(p.until_one_of({"<|tool_call_start|>", "["}));
+        auto maybe_start = p.optional(p.literal("<|tool_call_start|>"));
+        return generation_prompt + reasoning + content + maybe_start + tool_calls + end;
+    });
 
-            if (data.thinking_forced_open) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space [^<]* \"</response>\" space \"<|tool_call|>\" space " + tool_list);
-            } else {
-                builder.add_rule("root", "\"<|tool_call|>\" space " + tool_list);
-            }
+    data.parser = parser.save();
 
-            data.grammar_triggers.push_back({
-                COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
-                "<|tool_call|>"
+    if (include_grammar) {
+        data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.at("parameters");
+                builder.resolve_refs(schema);
             });
-
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-                "<|tool_call|>",
-            };
+            parser.build_grammar(builder, data.grammar_lazy);
+        });
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const std::string name = tool.at("function").at("name");
+            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[" + name + "(" });
         });
-    } else {
-        // Handle thinking tags for non-tool responses
-        if (data.thinking_forced_open && inputs.enable_thinking) {
-            data.grammar_lazy = false;
-            data.grammar = build_grammar([&](const common_grammar_builder & builder) {
-                builder.add_rule("root", "\"</think>\" space \"<response>\" space .* \"</response>\" space");
-            });
-            data.preserved_tokens = {
-                "<think>",
-                "</think>",
-                "<response>",
-                "</response>",
-            };
-        }
     }
 
     return data;
 }
 
-static common_chat_params common_chat_params_init_solar_open(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-
-    // Copy `reasoning_content` to `reasoning`
-    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
-        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
-            auto adjusted_message = msg;
-            adjusted_message["reasoning"] = msg.at("reasoning_content");
-            adjusted_message.erase("reasoning_content");
-            adjusted_messages.push_back(adjusted_message);
-        } else {
-            adjusted_messages.push_back(msg);
-        }
-    }
-
-    auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
-    auto include_grammar = true;
-
-    auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
-
-    // Check if we need to replace the flush token with end token during inference and without generation prompt.
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|flush|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
-    data.prompt = prompt;
-    data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
-    data.preserved_tokens = {
-        "<|think|>",
-        "<|content|>",
-        "<|begin|>",
-        "<|end|>",
-        "<|tool_calls|>",
-        "<|tool_call:begin|>",
-        "<|tool_call:end|>",
-        "<|tool_call:name|>",
-        "<|tool_call:args|>",
-    };
+static common_chat_params common_chat_params_init_gigachat_v3(
+        const common_chat_template & tmpl,
+        const autoparser::generation_params & inputs) {
 
-    auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
-        auto lit_think = p.atomic(p.literal("<|think|>"));
-        auto lit_assistant_begin = p.atomic(p.literal("<|begin|>assistant"));
-        auto lit_content = p.atomic(p.literal("<|content|>"));
-        auto lit_end = p.atomic(p.literal("<|end|>"));
-        auto parser_until_end = p.until("<|end|>");
-
-        // reasoning <- "<|think|>" (!"<|end|>" .)*
-        auto parser_reasoning = p.rule("reasoning", lit_think + p.reasoning(parser_until_end));
-
-        // content <- "<|content|>" (!"<|end|>" .)*
-        auto parser_content = p.rule("content", lit_content + p.content(parser_until_end));
-
-        // wrap_choice(items) <- item-choice wrapped*
-        // item-choice        <- items[0] / ... / items[n]
-        // wrapped            <- "<|end|><|begin|>assistant" item-choice
-        auto wrap_choice = [&](const std::vector<common_peg_parser> & items) {
-            auto choice = p.choice(items);
-            return choice + p.zero_or_more(lit_end + lit_assistant_begin + choice);
-        };
-
-        // wrap_seq(items) <- item[0] "<|end|><|begin|>assistant" item[1] ...
-        auto wrap_seq = [&](const std::vector<common_peg_parser> & items) {
-            auto seq = p.sequence();
-            for (auto i = 0u; i < items.size(); i++) {
-                if (i == 0) {
-                    seq += items[i];
-                    continue;
-                }
-                seq += lit_end + lit_assistant_begin + items[i];
-            }
-            return seq;
-        };
-
-        // Response format parser
-        if (inputs.json_schema.is_object() && !inputs.json_schema.empty()) {
-            auto parser_response_format = lit_content + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
-            return p.choice({
-                wrap_seq({parser_reasoning, parser_response_format}),
-                wrap_seq({parser_response_format})
-            });
-        }
+    common_chat_params data;
 
-        auto lit_tool_call_begin = p.literal("<|tool_call:begin|>");
-        auto lit_tool_call_name = p.literal("<|tool_call:name|>");
-        auto lit_tool_call_args = p.literal("<|tool_call:args|>");
-        auto lit_tool_call_end = p.literal("<|tool_call:end|>");
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = false;
+    data.preserved_tokens  = {
+        "<|message_sep|>\n\n",
+        "<|role_sep|>\n",
+    };
 
-        // Tool call parser
+    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
+    const auto *tool_call_start_prefix = "<|message_sep|>\n\nfunction call<|role_sep|>\n";
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto ret = p.eps();
         if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
-            auto parser_tool_call = p.choice();
-            foreach_function(inputs.tools, [&](const json & tool) {
+            // Build a choice of all available tools
+            auto tool_choice = p.choice();
+            for (const auto & tool : inputs.tools) {
                 const auto & function = tool.at("function");
                 std::string name = function.at("name");
                 const auto & schema = function.at("parameters");
 
-                // tool(name, schema) <- name "<|tool_call:args|>" schema
-                parser_tool_call |= p.rule("tool-" + name,
-                    p.atomic(p.tool_name(p.literal(name)) + lit_tool_call_args)
-                    + p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
-            });
+                auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
+                auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
 
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
-            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-
-            // tool-calls  <- "<|tool_calls|>" tool-call+
-            // tool-call   <- "<|tool_call:begin|> call-id "<|tool_call:name|>" &([^<]+ "<|tool_call:args|>") tool-choice "<|tool_call:end|>"
-            // call-id     <- [a-zA-Z0-9_-]+
-            // tool-choice <- tool(t[0].name, t[0].schema) / ... / tool(t[n].name, t[n].schema)
-            auto parser_tool_calls = p.trigger_rule("tool-calls",
-                p.atomic(p.literal("<|tool_calls|>"))
-                + p.repeat(
-                    p.tool_open(
-                        lit_tool_call_begin
-                        + p.tool_id(p.chars("[a-zA-Z0-9_-]", 1, -1))
-                        + lit_tool_call_name
-                        + p.peek(p.chars("[^<]", 1, -1) + lit_tool_call_args))
-                    + parser_tool_call
-                    + p.tool_close(lit_tool_call_end),
-                /* min = */ 1,
-                /* max = */ max_calls));
-
-            if (min_calls == 1) {
-                // If required, then try any combination of the reasoning, content, and tool call
-                return p.choice({
-                    wrap_seq({parser_reasoning, parser_content, parser_tool_calls}),
-                    wrap_seq({parser_reasoning, parser_tool_calls}),
-                    wrap_seq({parser_content, parser_tool_calls}),
-                    wrap_seq({parser_tool_calls})
-                });
+                auto tool_open = p.tool_open(p.literal("{") << tool_name);
+
+                tool_choice |= p.rule("tool-" + name, tool_open << "," << tool_args << "}");
             }
 
-            return wrap_choice({parser_reasoning, parser_content, parser_tool_calls});
+            // Define the tool call structure
+            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto max_calls = 1; // parallel toolcalls are not supported
+            auto tool_call = p.rule("tool-call", p.literal(tool_call_start_prefix) + tool_choice);
+            auto tool_calls = p.trigger_rule("tool-call-root", p.repeat(tool_call, /* min = */ min_calls, /* max = */ max_calls));
+
+            ret = p.content(p.until("<|message_sep|>\n\n")) << tool_calls;
+        } else {
+            // Content only parser
+            include_grammar = false;
+            ret = p.content(p.rest());
         }
 
-        // Content only parser
-        include_grammar = false;
-        return wrap_choice({parser_reasoning, parser_content});
+        return p.literal(inputs.generation_prompt) + ret;
     });
 
     data.parser = parser.save();
@@ -2853,133 +1791,192 @@ static common_chat_params common_chat_params_init_solar_open(const common_chat_t
         });
 
         data.grammar_triggers = {
-            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|tool_calls|>"}
+            {COMMON_GRAMMAR_TRIGGER_TYPE_WORD, tool_call_start_prefix}
         };
     }
-
     return data;
 }
 
-
-static common_chat_params common_chat_params_init_translate_gemma(const common_chat_template & tmpl, const struct templates_params & inputs) {
+static common_chat_params common_chat_params_init_deepseek_v3_2(const common_chat_template &    tmpl,
+                                                                 const autoparser::generation_params & inputs) {
     common_chat_params data;
 
-    // This template does not support tools or reasoning
-    // we just need to transform the messages into the correct schema
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
+    data.supports_thinking = true;
+    data.thinking_start_tag = "<think>";
+    data.thinking_end_tag   = "</think>";
+    data.preserved_tokens  = {
+        "｜DSML｜",
+        "<think>",
+        "</think>",
+    };
 
-    templates_params inputs_new = inputs;
-    json & messages = inputs_new.messages;
+    auto has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
+    auto extract_reasoning   = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto include_grammar     = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
+
+    const std::string DSML         = "｜DSML｜";
+    const std::string THINK_START  = "<think>";
+    const std::string THINK_END    = "</think>";
+    const std::string FC_START     = "<" + DSML + "function_calls>";
+    const std::string FC_END       = "</" + DSML + "function_calls>";
+    const std::string INVOKE_START = "<" + DSML + "invoke";
+    const std::string INVOKE_END   = "</" + DSML + "invoke>";
+    const std::string PARAM_START  = "<" + DSML + "parameter";
+    const std::string PARAM_END    = "</" + DSML + "parameter>";
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
+        auto end = p.end();
 
-    // default to chat_template_kwargs, or en-GB if not specified
-    std::string default_src_lang = inputs.extra_context.value("source_lang_code", "en-GB");
-    std::string default_tgt_lang = inputs.extra_context.value("target_lang_code", "en-GB");
+        auto reasoning = p.eps();
+        if (extract_reasoning && inputs.enable_thinking) {
+            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
+        } else if (extract_reasoning) {
+            // Thinking disabled but reasoning extraction requested: the generation prompt
+            // contains an empty <think></think> pair that must still be consumed.
+            reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END));
+        }
 
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("role") && message["role"].get<std::string>() != "user") {
-            continue;
+        if (has_response_format) {
+            auto response_format = p.rule("response-format",
+                p.literal("```json") + p.space() +
+                p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)) +
+                p.space() + p.literal("```"));
+            return generation_prompt + reasoning + response_format + end;
         }
-        if (!message.contains("content")) {
-            message["content"] = json::array();
-        }
-        if (message.contains("content") && !message["content"].is_array()) {
-            auto content_str = message["content"].get<std::string>();
-            // default to en-GB if not specified (to make common_chat_format_example works)
-            auto src_lang = message.contains("source_lang_code")
-                        ? message["source_lang_code"].get<std::string>() : default_src_lang;
-            auto tgt_lang = message.contains("target_lang_code")
-                        ? message["target_lang_code"].get<std::string>() : default_tgt_lang;
-            message["content"] = json::array({
-                json{
-                    {"type", "text"},
-                    {"text", content_str},
-                    {"source_lang_code", src_lang},
-                    {"target_lang_code", tgt_lang},
-                }
-            });
+
+        if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
+            return generation_prompt + reasoning + p.content(p.rest()) + end;
         }
-    }
 
-    data.prompt = apply(tmpl, inputs_new, std::nullopt, std::nullopt);
-    data.format = COMMON_CHAT_FORMAT_GENERIC;
+        auto tool_choice = p.choice();
+        foreach_function(inputs.tools, [&](const json & tool) {
+            const auto & function = tool.at("function");
+            std::string  name     = function.at("name");
+            auto params   = function.contains("parameters") ? function.at("parameters") : json::object();
+            const auto & props    = params.contains("properties") ? params.at("properties") : json::object();
+
+            std::set<std::string> required;
+            if (params.contains("required")) {
+                params.at("required").get_to(required);
+            }
 
-    return data;
-}
+            auto schema_info = common_schema_info();
+            schema_info.resolve_refs(params);
+
+            std::vector<common_peg_parser> required_parsers;
+            std::vector<common_peg_parser> optional_parsers;
+            for (const auto & [param_name, param_schema] : props.items()) {
+                bool is_required = required.find(param_name) != required.end();
+                bool is_string   = schema_info.resolves_to_string(param_schema);
+
+                auto arg = p.tool_arg(
+                    p.tool_arg_open(
+                        p.literal(PARAM_START + " name=\"") +
+                        p.tool_arg_name(p.literal(param_name)) +
+                        p.literal("\" string=\"" + std::string(is_string ? "true" : "false") + "\">")) +
+                    (is_string
+                         ? p.tool_arg_string_value(p.until(PARAM_END))
+                         : p.tool_arg_json_value(p.schema(p.json(),
+                                                          "tool-" + name + "-arg-" + param_name + "-schema",
+                                                          param_schema, false))) +
+                    p.tool_arg_close(p.literal(PARAM_END)));
+
+                auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
+                if (is_required) {
+                    required_parsers.push_back(named_arg);
+                } else {
+                    optional_parsers.push_back(named_arg);
+                }
+            }
 
-static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
-    common_chat_params data;
-    data.prompt = apply(tmpl, inputs);
-    data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    data.grammar_lazy = false;
-    if (!inputs.json_schema.is_null()) {
-        if (!inputs.grammar.empty()) {
-            throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
-        }
-        data.grammar = json_schema_to_grammar(inputs.json_schema);
-    } else {
-        data.grammar = inputs.grammar;
-    }
-    return data;
-}
+            common_peg_parser args_seq = p.eps();
+            for (size_t i = 0; i < required_parsers.size(); i++) {
+                if (i > 0) {
+                    args_seq = args_seq + p.space();
+                }
+                args_seq = args_seq + required_parsers[i];
+            }
 
-static common_chat_params common_chat_params_init_seed_oss(
-    const common_chat_template         & tmpl,
-    templates_params                   & params,
-    const common_chat_templates_inputs & inputs)
-{
-    common_chat_params data;
-    data.prompt = apply(tmpl, params);
-    data.format = COMMON_CHAT_FORMAT_SEED_OSS;
-    if (string_ends_with(data.prompt, "<seed:think>")) {
-        if (!inputs.enable_thinking) {
-            data.prompt += "</seed:think>";
+            if (!optional_parsers.empty()) {
+                common_peg_parser any_opt = p.choice();
+                for (const auto & opt : optional_parsers) {
+                    any_opt |= opt;
+                }
+                args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
+            }
+
+            common_peg_parser invoke_body = args_seq;
+            auto func_parser = p.tool(
+                p.tool_open(p.literal(INVOKE_START + " name=\"") +
+                            p.tool_name(p.literal(name)) + p.literal("\">\n")) +
+                invoke_body + p.space() +
+                p.tool_close(p.literal(INVOKE_END)));
+
+            tool_choice |= p.rule("tool-" + name, func_parser);
+        });
+
+        auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+
+        common_peg_parser tool_calls = p.eps();
+        if (inputs.parallel_tool_calls) {
+            tool_calls = p.trigger_rule("tool-call",
+                p.literal(FC_START) + p.space() + tool_choice +
+                p.zero_or_more(p.space() + tool_choice) + p.space() + p.literal(FC_END));
         } else {
-            data.thinking_forced_open = true;
+            tool_calls = p.trigger_rule("tool-call",
+                p.literal(FC_START) + p.space() + tool_choice + p.space() + p.literal(FC_END));
         }
-    }
 
-    if (params.tools.is_array() && !params.tools.empty()) {
-        data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
-            std::vector<std::string> tool_rules;
-            foreach_function(params.tools, [&](const json & tool) {
-                const auto & function   = tool.at("function");
-                std::string  name       = function.at("name");
-                auto         parameters = function.at("parameters");
-                builder.resolve_refs(parameters);
-
-                // Create rule for Seed-OSS function call format
-                std::string param_rules;
-                if (parameters.contains("properties")) {
-                    for (const auto & [key, value] : parameters.at("properties").items()) {
-                        param_rules += "\"<parameter=" + key + ">\"" + builder.add_schema(name + "-arg-" + key, value) +
-                                       "\"</parameter>\"";
-                    }
-                }
-
-                tool_rules.push_back(builder.add_rule(name + "-call",
-                                                      "\"<seed:tool_call>\" space \"<function=" + name + ">\" space " +
-                                                          param_rules +
-                                                          " \"</function>\" space \"</seed:tool_call>\""));
-            });
+        if (!require_tools) {
+            tool_calls = p.optional(tool_calls);
+        }
 
-            data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<seed:tool_call>" });
+        auto content_before_tools = p.content(p.until(FC_START));
+        return generation_prompt + reasoning + content_before_tools + tool_calls + end;
+    });
 
-            data.preserved_tokens = {
-                "<seed:think>", "</seed:think>", "<seed:tool_call>", "</seed:tool_call>",
-                "<function=",   "</function>",   "<parameter=",      "</parameter>",
-            };
+    data.parser = parser.save();
 
-            builder.add_rule("root", string_join(tool_rules, " | "));
+    if (include_grammar) {
+        data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
+        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
+            foreach_function(inputs.tools, [&](const json & tool) {
+                const auto & function = tool.at("function");
+                auto         schema   = function.contains("parameters") ? function.at("parameters") : json::object();
+                builder.resolve_refs(schema);
+            });
+            if (has_response_format) {
+                auto schema = inputs.json_schema;
+                builder.resolve_refs(schema);
+            }
+            parser.build_grammar(builder, data.grammar_lazy);
         });
+
+        data.grammar_triggers = {
+            { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, FC_START },
+        };
     }
+
     return data;
 }
 
-// various workarounds for known issues with certain templates or model behaviors
-// TODO @ngxson : improve this (how?)
 namespace workaround {
 
+static void map_developer_role_to_system(json & messages) {
+    for (auto & message : messages) {
+        if (message.contains("role")) {
+            if (message["role"] == "developer") {
+                message["role"] = "system";
+            }
+        }
+    }
+}
+
+
 // if first message is system and template does not support it, merge it with next message
 static void system_message_not_supported(json & messages) {
     if (!messages.empty() && messages.front().at("role") == "system") {
@@ -2997,359 +1994,395 @@ static void system_message_not_supported(json & messages) {
     }
 }
 
-static void func_args_not_string(json & messages) {
+static void requires_non_null_content(json & messages) {
     GGML_ASSERT(messages.is_array());
     for (auto & message : messages) {
-        if (message.contains("tool_calls")) {
-            for (auto & tool_call : message["tool_calls"]) {
-                if (tool_call.contains("function") && tool_call["function"].contains("arguments")) {
-                    auto & args = tool_call["function"]["arguments"];
-                    if (args.is_string()) {
-                        try {
-                            args = json::parse(args.get<std::string>());
-                        } catch (const std::exception & e) {
-                            throw std::runtime_error("Failed to parse tool call arguments as JSON: " + std::string(e.what()));
-                        }
-                    }
-                }
-            }
+        if (message.contains("tool_calls") && !message.contains("content")) {
+            message["content"] = "";
         }
     }
 }
 
-static void move_tool_calls_to_content(json & messages, int indent_spaces = 2) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls")) {
-            auto tool_calls_new = json{
-                {"tool_calls", message.at("tool_calls")}
-            };
-            message.erase("tool_calls");
-            auto content = message.at("content");
-            std::string content_new = content.is_null() ? "" : content.get<std::string>();
-            message["content"] = content_new + tool_calls_new.dump(indent_spaces, ' ', false, json::error_handler_t::replace);
+// Gemma4 uses a custom tool_responses field instead of role:tool messages.
+//
+// This will transform a sequence of messages:
+//   assistant(tool_call+) -> tool+ -> assistant(content)
+//
+// Into a single assistant message containing a tool_responses field:
+//   assistant(content + tool_call + tool_responses)
+//
+// This is necessary for the Gemma4 chat template to properly format the prompt.
+// See https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
+struct gemma4_model_turn_builder {
+    json & messages;
+    size_t pos;
+    json tool_calls = json::array();
+    json tool_responses = json::array();
+    json content;
+    json reasoning_content;
+
+    gemma4_model_turn_builder(json & msgs, size_t pos) : messages(msgs), pos(pos) {}
+
+    void collect() {
+        // Collect the first assistant message
+        auto & msg = messages[pos];
+        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
+            // According to the prompt formatting guide, we need to preserve reasoning_content
+            // between function calls. The current chat templates do not support this, but we will do it anyway.
+            reasoning_content = msg.at("reasoning_content");
         }
-    }
-}
+        for (auto & tc : msg.at("tool_calls")) {
+            tool_calls.push_back(tc);
+        }
+        pos++;
 
-// TODO @ngxson : we may remove support for generic schema in the future
-static void use_generic_schema(json & messages) {
-    GGML_ASSERT(messages.is_array());
-    for (auto & message : messages) {
-        if (message.contains("tool_calls") && message.at("tool_calls").is_array()) {
-            auto & tool_calls = message.at("tool_calls");
-            for (auto & tool_call : tool_calls) {
-                if (tool_call.contains("type") && tool_call.at("type") == "function" &&
-                    tool_call.contains("function") && tool_call.at("function").is_object()) {
-                    // Copy values before erasing to avoid use-after-free
-                    json name_value;
-                    json arguments_value;
-                    json id_value;
-                    const auto & function = tool_call.at("function");
-                    if (function.contains("name")) {
-                        name_value = function.at("name");
-                    }
-                    if (function.contains("arguments")) {
-                        arguments_value = function.at("arguments");
-                    }
-                    if (tool_call.contains("id")) {
-                        id_value = tool_call.at("id");
-                    }
-                    // Now safely erase and assign in the correct order
-                    tool_call.erase("type");
-                    tool_call.erase("function");
-                    tool_call.erase("id");
-                    // Reassign in desired order: name, arguments, id
-                    if (!name_value.is_null()) {
-                        tool_call["name"] = name_value;
-                    }
-                    if (!arguments_value.is_null()) {
-                        tool_call["arguments"] = arguments_value;
-                    }
-                    if (!id_value.is_null()) {
-                        tool_call["id"] = id_value;
-                    }
-                }
+        // Collect tool call results
+        while (pos < messages.size() && messages[pos].value("role", "") == "tool") {
+            collect_result(messages[pos]);
+            pos++;
+        }
+
+        // Check if the next assistant message is the final message
+        if (pos < messages.size() && messages[pos].value("role", "") == "assistant") {
+            auto & next = messages[pos];
+            if (!has_tool_calls(next) && has_content(next)) {
+                content = next.at("content");
+                pos++;
             }
         }
     }
-}
 
-} // namespace workaround
+    void collect_result(const json & curr) {
+        json response;
+        if (curr.contains("content")) {
+            const auto & content = curr.at("content");
+            if (content.is_string()) {
+                // Try to parse the content as JSON; fall back to raw string
+                try {
+                    response = json::parse(content.get<std::string>());
+                } catch (...) {
+                    response = content;
+                }
+            } else {
+                response = content;
+            }
+        }
 
-static common_chat_params common_chat_templates_apply_jinja(
-    const struct common_chat_templates        * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    templates_params params;
-    params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
-    const auto & tmpl = params.tools.is_array() && tmpls->template_tool_use
-        ? *tmpls->template_tool_use
-        : *tmpls->template_default;
-    const auto & src = tmpl.source();
-    const auto & caps = tmpl.original_caps();
-    params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
-    params.add_generation_prompt = inputs.add_generation_prompt;
-    params.tool_choice = inputs.tool_choice;
-    params.reasoning_format = inputs.reasoning_format;
-    params.enable_thinking = inputs.enable_thinking;
-    params.grammar = inputs.grammar;
-    params.now = inputs.now;
-    params.add_bos = tmpls->add_bos;
-    params.add_eos = tmpls->add_eos;
+        std::string name;
 
-    if (!tmpl.original_caps().supports_system_role) {
-        workaround::system_message_not_supported(params.messages);
-    }
+        // Match name with corresponding tool call
+        size_t idx = tool_responses.size();
+        if (idx < tool_calls.size()) {
+            auto & tc = tool_calls[idx];
+            if (tc.contains("function")) {
+                name = tc.at("function").value("name", "");
+            }
+        }
 
-    params.extra_context = json::object();
-    for (auto el : inputs.chat_template_kwargs) {
-        params.extra_context[el.first] = json::parse(el.second);
-    }
+        // Fallback to the tool call id
+        if (name.empty()) {
+            name = curr.value("tool_call_id", "");
+        }
 
-    if (!inputs.json_schema.empty()) {
-        params.json_schema = json::parse(inputs.json_schema);
+        tool_responses.push_back({{"name", name}, {"response", response}});
     }
 
-    if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
-        LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");
-        params.parallel_tool_calls = false;
-    } else {
-        params.parallel_tool_calls = inputs.parallel_tool_calls;
-    }
+    json build() {
+        collect();
 
-    if (params.tools.is_array()) {
-        if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
-            throw std::runtime_error("Cannot specify grammar with tools");
+        json msg = {
+            {"role", "assistant"},
+            {"tool_calls", tool_calls},
+        };
+        if (!tool_responses.empty()) {
+            msg["tool_responses"] = tool_responses;
         }
-        if (caps.supports_tool_calls && !caps.supports_tools) {
-            LOG_WRN("Template supports tool calls but does not natively describe tools. The fallback behaviour used may produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
+        if (!content.is_null()) {
+            msg["content"] = content;
         }
+        if (!reasoning_content.is_null()) {
+            msg["reasoning_content"] = reasoning_content;
+        }
+        return msg;
     }
 
-    // DeepSeek V3.1: detect based on specific patterns in the template
-    if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
-        params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_v3_1(tmpl, params);
+    static bool has_content(const json & msg) {
+        if (!msg.contains("content") || msg.at("content").is_null()) {
+            return false;
+        }
+        const auto & content = msg.at("content");
+        if (content.is_string() && !content.get<std::string>().empty()) {
+            return true;
+        }
+        if (content.is_array() && !content.empty()) {
+            return true;
+        }
+        return false;
     }
 
-    // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
-    if (src.find("<｜tool▁calls▁begin｜>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_deepseek_r1(tmpl, params);
+    static bool has_tool_calls(const json & msg) {
+        return msg.contains("tool_calls") && msg.at("tool_calls").is_array() && !msg.at("tool_calls").empty();
     }
+};
 
-    // Command R7B: : use handler in all cases except json schema (thinking / tools).
-    if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos && params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_command_r7b(tmpl, params);
-    }
+static void convert_tool_responses_gemma4(json & messages) {
+    json result = json::array();
+    size_t i = 0;
 
-    // Granite (IBM) - detects thinking / tools support
-    if (src.find("elif thinking") != std::string::npos && src.find("<|tool_call|>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        workaround::use_generic_schema(params.messages);
-        workaround::move_tool_calls_to_content(params.messages);
-        return common_chat_params_init_granite(tmpl, params);
-    }
+    while (i < messages.size()) {
+        auto & msg = messages[i];
 
-    // GLM 4.5: detect by <arg_key> and <arg_value> tags (check before Hermes since both use <tool_call>)
-    if (src.find("[gMASK]<sop>") != std::string::npos &&
-        src.find("<arg_key>") != std::string::npos &&
-        src.find("<arg_value>") != std::string::npos &&
-        params.json_schema.is_null()) {
-        workaround::func_args_not_string(params.messages);
-        if (!params.extra_context.contains("clear_thinking")) {
-            // by default, do not clear reasoning_content (added since GLM-4.7)
-            params.extra_context["clear_thinking"] = false;
-        }
-        return common_chat_params_init_glm_4_5(tmpl, params);
-    }
-
-    //// Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    //// Detect via XML markers: <tool_call>, <function=...>, and <parameter=...> blocks.
-    //// Also matches Step-3.5-Flash and Nemotron 3 Nano which use the same output format.
-    //if (src.find("<tool_call>") != std::string::npos &&
-    //    src.find("<function=") != std::string::npos &&
-    //    src.find("<parameter=") != std::string::npos) {
-    //    workaround::func_args_not_string(params.messages);
-    //    return common_chat_params_init_qwen3_coder(tmpl, params);
-    //}
-
-    // Qwen3-Coder XML format detection (must come before Hermes 2 Pro)
-    // Detect via explicit XML markers unique to Qwen3-Coder to avoid false positives in other templates.
-    // Require presence of <tool_call>, <function=...>, and <parameter=...> blocks.
-    if (src.find("<tool_call>") != std::string::npos &&
-        src.find("<function=") != std::string::npos &&
-        src.find("<parameter=") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        // Models with <think> support (Step-3.5-Flash, Nemotron 3 Nano) use the
-        // Nemotron v3 PEG parser for streaming and schema-aware parameter parsing.
-        if (inputs.use_peg) {
-            return common_chat_params_init_qwen3_coder(tmpl, params);
+        if (msg.value("role", "") != "assistant" || !msg.contains("tool_calls") ||
+            !msg.at("tool_calls").is_array() || msg.at("tool_calls").empty()) {
+            result.push_back(msg);
+            i++;
+            continue;
         }
-        return common_chat_params_init_qwen3_coder_xml(tmpl, params);
-    }
 
-    // Xiaomi MiMo format detection (must come before Hermes 2 Pro)
-    if (src.find("<tools>") != std::string::npos &&
-        src.find("# Tools") != std::string::npos &&
-        src.find("</tools>") != std::string::npos &&
-        src.find("<tool_calls>") != std::string::npos &&
-        src.find("</tool_calls>") != std::string::npos &&
-        src.find("<tool_response>") != std::string::npos) {
-        return common_chat_params_init_xiaomi_mimo(tmpl, params);
+        gemma4_model_turn_builder builder(messages, i);
+        result.push_back(builder.build());
+        i = builder.pos;
     }
 
-    // MiroThinker format detection (must come before Hermes 2 Pro)
-    if (src.find("</use_mcp_tool>") != std::string::npos &&
-        src.find("</server_name>") != std::string::npos &&
-        src.find("</tool_name>") != std::string::npos &&
-        src.find("</arguments>") != std::string::npos) {
-        return common_chat_params_init_mirothinker(tmpl, params);
+    messages = result;
+}
+
+static void func_args_not_string(json & messages) {
+    GGML_ASSERT(messages.is_array());
+    for (auto & message : messages) {
+        if (message.contains("tool_calls")) {
+            for (auto & tool_call : message["tool_calls"]) {
+                if (tool_call.contains("function") && tool_call["function"].contains("arguments")) {
+                    auto & args = tool_call["function"]["arguments"];
+                    if (args.is_string()) {
+                        try {
+                            args = json::parse(args.get<std::string>());
+                        } catch (const std::exception & e) {
+                            throw std::runtime_error("Failed to parse tool call arguments as JSON: " + std::string(e.what()));
+                        }
+                    }
+                }
+            }
+        }
     }
+}
+
+}
 
-    // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
-    if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
-        return common_chat_params_init_hermes_2_pro(tmpl, params);
+static json common_chat_extra_context() {
+    json ctx = json::object();
+    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
+    std::string datetime_str = format_time(now, "%b %d %Y");
+    std::string date_str = format_time(now, "%d %b %Y");
+    ctx["datetime"] = datetime_str;
+    ctx["date_string"] = date_str;
+    return ctx;
+}
+
+std::optional<common_chat_params> common_chat_try_specialized_template(
+        const common_chat_template &          tmpl,
+        const std::string &                   src,
+        autoparser::generation_params & params) {
+    // Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
+    // Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
+    if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&
+        src.find("[ARGS]") != std::string::npos && src.find("[CALL_ID]") == std::string::npos) {
+        LOG_DBG("Using specialized template: Ministral/Magistral Large 3\n");
+        return common_chat_params_init_ministral_3(tmpl, params);
     }
 
-    // GPT-OSS
+    // GPT-OSS - has unique channel-based structure that needs dedicated handler
     if (src.find("<|channel|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: GPT-OSS\n");
         return common_chat_params_init_gpt_oss(tmpl, params);
     }
 
-    // Seed-OSS
-    if (src.find("<seed:think>") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_seed_oss(tmpl, params, inputs);
-    }
-
-    // Nemotron v2
-    if (src.find("<SPECIAL_10>") != std::string::npos) {
-        return common_chat_params_init_nemotron_v2(tmpl, params);
+    // Functionary v3.2 - uses recipient-based format with >>>recipient\n{content}
+    // Detection: template has ">>>all" for content and ">>>" prefix for tool calls
+    if (src.find(">>>all") != std::string::npos && src.find(">>>${recipient}") != std::string::npos) {
+        LOG_DBG("Using specialized template: Functionary v3.2\n");
+        return common_chat_params_init_functionary_v3_2(tmpl, params);
     }
 
-    // Apertus format detection
-    if (src.find("<|system_start|>") != std::string::npos && src.find("<|tools_prefix|>") != std::string::npos) {
-        return common_chat_params_init_apertus(tmpl, params);
+    // Kimi K2 Thinking - uses unique tool call ID format: functions.<name>:<index>
+    // Detection: template has "<|tool_calls_section_begin|>" and "functions." prefix in tool call IDs
+    if (src.find("<|tool_calls_section_begin|>") != std::string::npos &&
+        src.find("<|tool_call_begin|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: Kimi K2 Thinking\n");
+        return common_chat_params_init_kimi_k2(tmpl, params);
     }
 
-    // LFM2 (w/ tools)
-    if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&
-        src.find("]<|tool_list_end|>") != std::string::npos) {
+    // MiroThinker - uses MCP style toolcalling <use_mcp_tool> ... </use_mcp_tool>
+    // Detection: template has "</use_mcp_tool>" and "</server_name>"
+    if (src.find("</use_mcp_tool>") != std::string::npos &&
+        src.find("</server_name>") != std::string::npos) {
+        LOG_DBG("Using specialized template: MiroThinker\n");
+        return common_chat_params_init_mirothinker(tmpl, params);      
+    }
+
+    // LFM2 - uses <|tool_list_start|>/<|tool_list_end|> markers and <|tool_call_start|>[name(args)]<|tool_call_end|> format
+    // Detection: template has "<|tool_list_start|>" and "<|tool_list_end|>" markers
+    // LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
+    // and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
+    if (src.find("<|tool_list_start|>") != std::string::npos &&
+        src.find("<|tool_list_end|>") != std::string::npos) {
+        LOG_DBG("Using specialized template: LFM2\n");
         return common_chat_params_init_lfm2(tmpl, params);
     }
 
-    // MiniMax-M2 format detection
-    if (src.find("]~!b[") != std::string::npos && src.find("]~b]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_minimax_m2(tmpl, params);
+    // LFM2.5 format detection: template uses plain "List of tools: [...]" with no special tokens
+    if (src.find("List of tools: [") != std::string::npos &&
+        src.find("<|tool_list_start|>") == std::string::npos) {
+        LOG_DBG("Using specialized template: LFM2.5\n");
+        return common_chat_params_init_lfm2_5(tmpl, params);
     }
 
-    // Kimi K2 format detection
-    if (src.find("<|im_system|>tool_declare<|im_middle|>") != std::string::npos &&
-        src.find("<|tool_calls_section_begin|>") != std::string::npos &&
-        src.find("## Return of") != std::string::npos) {
-        return common_chat_params_init_kimi_k2(tmpl, params);
+    // GigaChatV3 format detection
+    if (src.find("<|role_sep|>") != std::string::npos &&
+        src.find("<|message_sep|>") != std::string::npos &&
+        src.find("<|function_call|>") == std::string::npos) {
+        LOG_DBG("Using specialized template: GigaChatV3\n");
+        return common_chat_params_init_gigachat_v3(tmpl, params);
     }
 
-    // Apriel 1.5 format detection
-    if (src.find("<thinking>") != std::string::npos &&
-        src.find("</thinking>") != std::string::npos &&
-        src.find("<available_tools>") != std::string::npos &&
-        src.find("<|assistant|>") != std::string::npos &&
-        src.find("<|tool_result|>") != std::string::npos &&
-        src.find("<tool_calls>[") != std::string::npos &&
-        src.find("]</tool_calls>") != std::string::npos) {
-        return common_chat_params_init_apriel_1_5(tmpl, params);
+    // DeepSeek V3.2 format detection: template defines dsml_token and uses it for tool calls.
+    // The template source contains the token as a variable assignment, not as a literal in markup.
+    if (src.find("dsml_token") != std::string::npos &&
+        src.find("function_calls") != std::string::npos &&
+        src.find("DSML") != std::string::npos) {
+        LOG_DBG("Using specialized template: DeepSeek V3.2\n");
+        return common_chat_params_init_deepseek_v3_2(tmpl, params);
     }
 
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
+    // Gemma4 format detection
+    if (src.find("'<|tool_call>call:'") != std::string::npos) {
+        if (src.find("{#- OpenAI Chat Completions:") == std::string::npos) {
+            // apply workarounds if using the older gemma4 templates
+            LOG_WRN("%s: detected an outdated gemma4 chat template, applying compatibility workarounds. "
+                    "Consider updating to the official template.\n", __func__);
+            workaround::convert_tool_responses_gemma4(params.messages);
+        }
+        return common_chat_params_init_gemma4(tmpl, params);
     }
 
-    // Use generic handler when mixing tools + JSON schema.
-    // TODO: support that mix in handlers below.
-    if ((params.tools.is_array() && params.json_schema.is_object())) {
-        return common_chat_params_init_generic(tmpl, params);
-    }
+    return std::nullopt;
+}
 
-    // Functionary prepends "all\n" to plain content outputs, so we use its handler in all cases.
-    if (src.find(">>>all") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_2(tmpl, params);
+static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates *        tmpls,
+                                                            const struct common_chat_templates_inputs & inputs) {
+    autoparser::generation_params params;
+    params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
+    const auto & tmpl =
+        params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use : *tmpls->template_default;
+    const auto & src        = tmpl.source();
+    const auto & caps       = tmpl.original_caps();
+    params.messages         = render_message_to_json(inputs.messages, tmpl.original_caps());
+    params.tool_choice      = inputs.tool_choice;
+    params.reasoning_format = inputs.reasoning_format;
+    params.enable_thinking  = inputs.enable_thinking;
+    params.grammar          = inputs.grammar;
+    params.now              = inputs.now;
+    params.add_bos          = tmpls->add_bos;
+    params.add_eos          = tmpls->add_eos;
+
+    if (src.find("<|channel|>") == std::string::npos) {
+        // map developer to system for all models except for GPT-OSS
+        workaround::map_developer_role_to_system(params.messages);
     }
 
-    // Firefunction v2 requires datetime and functions in the context even w/o tools, so we also use its handler in all cases.
-    if (src.find(" functools[") != std::string::npos) {
-        return common_chat_params_init_firefunction_v2(tmpl, params);
+    if (!tmpl.original_caps().supports_system_role) {
+        workaround::system_message_not_supported(params.messages);
     }
 
-    // Functionary v3.1 (w/ tools)
-    if (src.find("<|start_header_id|>") != std::string::npos
-        && src.find("<function=") != std::string::npos) {
-        return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
+    if (tmpl.original_caps().supports_tool_calls) {
+        // some templates will require the content field in tool call messages
+        // to still be non-null, this puts an empty string everywhere where the
+        // content field is null
+        workaround::requires_non_null_content(params.messages);
     }
 
-    // Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
-    if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
-        auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
+    if (tmpl.original_caps().supports_object_arguments) {
         workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
     }
 
-    // Ministral/Mistral Large 3
-    if (src.find("[SYSTEM_PROMPT]") != std::string::npos &&
-        src.find("[TOOL_CALLS]") != std::string::npos &&
-        src.find("[ARGS]") != std::string::npos) {
-        return common_chat_params_init_ministral_3(tmpl, params);
-    }
+    params.add_generation_prompt = false;
+    std::string no_gen_prompt    = common_chat_template_direct_apply_impl(tmpl, params);
+    params.add_generation_prompt = true;
+    std::string gen_prompt       = common_chat_template_direct_apply_impl(tmpl, params);
+    auto        diff             = calculate_diff_split(no_gen_prompt, gen_prompt);
+    params.generation_prompt     = diff.right + diff.suffix;
 
-    if (src.find("[THINK]") != std::string::npos && src.find("[/THINK]") != std::string::npos) {
-        return common_chat_params_init_magistral(tmpl, params);
-    }
+    params.add_generation_prompt = inputs.add_generation_prompt;
 
-    // Solar Open
-    if (src.find("<|tool_response:begin|>") != std::string::npos &&
-        src.find("<|tool_response:name|>") != std::string::npos &&
-        src.find("<|tool_response:result|>") != std::string::npos) {
-        return common_chat_params_init_solar_open(tmpl, params);
+    params.extra_context = common_chat_extra_context();
+    for (auto el : inputs.chat_template_kwargs) {
+        params.extra_context[el.first] = json::parse(el.second);
     }
 
-    // TranslateGemma
-    if (src.find("[source_lang_code]") != std::string::npos &&
-        src.find("[target_lang_code]") != std::string::npos) {
-        return common_chat_params_init_translate_gemma(tmpl, params);
+    if (!inputs.json_schema.empty()) {
+        params.json_schema = json::parse(inputs.json_schema);
     }
 
-    // Plain handler (no tools)
-    if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
-        return common_chat_params_init_without_tools(tmpl, params);
+    params.parallel_tool_calls = inputs.parallel_tool_calls;
+
+    if (params.tools.is_array()) {
+        if (params.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && !params.grammar.empty()) {
+            throw std::runtime_error("Cannot specify grammar with tools");
+        }
+        if (caps.supports_tool_calls && !caps.supports_tools) {
+            LOG_WRN(
+                "Template supports tool calls but does not natively describe tools. The fallback behaviour used may "
+                "produce bad results, inspect prompt w/ --verbose & consider overriding the template.\n");
+        }
+    }
+
+    if (inputs.force_pure_content) {
+        LOG_WRN("Forcing pure content template, will not render reasoning or tools separately.");
+        // Create the result structure
+        common_chat_params data;
+        auto params_copy               = params;
+        params_copy.reasoning_format   = COMMON_REASONING_FORMAT_NONE;
+        data.prompt                    = common_chat_template_direct_apply_impl(tmpl, params_copy);
+        data.format                    = COMMON_CHAT_FORMAT_PEG_NATIVE;
+        data.generation_prompt         = params.generation_prompt;
+        auto parser                    = build_chat_peg_parser([&params](common_chat_peg_builder &p) {
+            return p.prefix(params.generation_prompt) << p.content(p.rest());
+        });
+        data.parser                    = parser.save();
+        return data;
     }
 
-    // Mistral Nemo (w/ tools)
-    if (src.find("[TOOL_CALLS]") != std::string::npos) {
-        workaround::func_args_not_string(params.messages);
-        return common_chat_params_init_mistral_nemo(tmpl, params);
+    if (auto result = common_chat_try_specialized_template(tmpl, src, params)) {
+        result->generation_prompt = params.generation_prompt;
+        return *result;
     }
 
-    // Generic fallback
-    workaround::func_args_not_string(params.messages);
-    workaround::use_generic_schema(params.messages);
-    workaround::move_tool_calls_to_content(params.messages);
-    return common_chat_params_init_generic(tmpl, params);
+    try {
+        LOG_DBG("%s: using differential autoparser\n", __func__);
+        struct autoparser::autoparser autoparser;
+        autoparser.analyze_template(tmpl);
+        auto auto_params = autoparser::peg_generator::generate_parser(tmpl, params, autoparser);
+        auto_params.supports_thinking = autoparser.reasoning.mode != autoparser::reasoning_mode::NONE;
+        if (auto_params.supports_thinking) {
+            auto_params.thinking_start_tag = autoparser.reasoning.start;
+            auto_params.thinking_end_tag   = autoparser.reasoning.end;
+        }
+        auto_params.generation_prompt = params.generation_prompt;
+        common_peg_arena arena;
+        arena.load(auto_params.parser);
+        LOG_DBG("%s: generated parser:\n%s\n\nparser generation prompt: %s\n", __func__, arena.dump(arena.root()).c_str(), auto_params.generation_prompt.c_str());
+        return auto_params;
+    } catch (const std::exception & e) {
+        throw std::invalid_argument(std::string("Unable to generate parser for this template. Automatic parser generation failed: ") + e.what());
+    }
 }
 
 // Legacy template route (adhoc C++ implementation of known templates), forward to llama_chat_apply_template.
-static common_chat_params common_chat_templates_apply_legacy(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
-    size_t alloc_size = 0;
+static common_chat_params common_chat_templates_apply_legacy(const struct common_chat_templates *        tmpls,
+                                                             const struct common_chat_templates_inputs & inputs) {
+    size_t                          alloc_size = 0;
     std::vector<llama_chat_message> chat;
-    std::vector<std::string> contents;
+    std::vector<std::string>        contents;
 
     for (const auto & msg : inputs.messages) {
         auto content = msg.content;
@@ -3359,25 +2392,27 @@ static common_chat_params common_chat_templates_apply_legacy(
                 continue;
             }
             if (!content.empty()) {
-                content += "\n";;
+                content += "\n";
+                ;
             }
             content += part.text;
         }
         contents.emplace_back(std::move(content));
     }
     for (size_t i = 0; i < contents.size(); ++i) {
-        const auto & msg = inputs.messages[i];
+        const auto & msg     = inputs.messages[i];
         const auto & content = contents[i];
-        chat.push_back({msg.role.c_str(), content.c_str()});
+        chat.push_back({ msg.role.c_str(), content.c_str() });
         size_t msg_size = msg.role.size() + content.size();
-        alloc_size += msg_size + (msg_size / 4); // == msg_size * 1.25 but avoiding float ops
+        alloc_size += msg_size + (msg_size / 4);  // == msg_size * 1.25 but avoiding float ops
     }
 
     std::vector<char> buf(alloc_size);
 
     // run the first time to get the total output length
     const auto & src = tmpls->template_default->source();
-    int32_t res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+    int32_t      res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt,
+                                                 buf.data(), buf.size());
 
     // error: chat template is not supported
     if (res < 0) {
@@ -3389,7 +2424,8 @@ static common_chat_params common_chat_templates_apply_legacy(
     // if it turns out that our buffer is too small, we resize it
     if ((size_t) res > buf.size()) {
         buf.resize(res);
-        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(), buf.size());
+        res = llama_chat_apply_template(src.c_str(), chat.data(), chat.size(), inputs.add_generation_prompt, buf.data(),
+                                        buf.size());
     }
 
     // for safety, we check the result again
@@ -3407,14 +2443,90 @@ static common_chat_params common_chat_templates_apply_legacy(
     return params;
 }
 
-common_chat_params common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs)
-{
+common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                               const struct common_chat_templates_inputs & inputs) {
     GGML_ASSERT(tmpls != nullptr);
-    return inputs.use_jinja
-        ? common_chat_templates_apply_jinja(tmpls, inputs)
-        : common_chat_templates_apply_legacy(tmpls, inputs);
+    return inputs.use_jinja ? common_chat_templates_apply_jinja(tmpls, inputs) :
+                              common_chat_templates_apply_legacy(tmpls, inputs);
+}
+
+common_chat_msg common_chat_parse(const std::string &               input,
+                                  bool                              is_partial,
+                                  const common_chat_parser_params & params) {
+    return common_chat_peg_parse(params.parser, input, is_partial, params);
+}
+
+common_chat_msg common_chat_peg_parse(const common_peg_arena &          src_parser,
+                                      const std::string &               input,
+                                      bool                              is_partial,
+                                      const common_chat_parser_params & params) {
+    const common_peg_arena & parser = src_parser.empty() ?
+        build_chat_peg_parser([](common_chat_peg_builder & p) { return p.content(p.rest()) + p.end(); }) :
+        src_parser;
+
+    if (src_parser.empty()) {
+        LOG_DBG("No parser definition detected, assuming pure content parser.");
+    }
+
+    const std::string effective_input = params.generation_prompt.empty()
+        ? input
+        : params.generation_prompt + input;
+
+    LOG_DBG("Parsing PEG input with format %s: %s\n", common_chat_format_name(params.format), effective_input.c_str());
+
+    common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_LENIENT;
+    if (params.debug) {
+        flags |= COMMON_PEG_PARSE_FLAG_DEBUG;
+    }
+
+    common_peg_parse_context ctx(effective_input, flags);
+    auto result = parser.parse(ctx);
+
+    if (result.fail()) {
+        // During partial parsing, return partial results if any AST nodes were captured
+        // This allows streaming to work correctly for formats like FUNC_MARKDOWN_CODE_BLOCK
+        if (is_partial && result.end > 0) {
+            // Try to extract any partial results from what was successfully parsed
+            common_chat_msg msg;
+            msg.role = "assistant";
+            std::unique_ptr<common_chat_peg_mapper> mapper;
+            if (params.format == COMMON_CHAT_FORMAT_PEG_GEMMA4) {
+                mapper = std::make_unique<common_chat_peg_gemma4_mapper>(msg);
+            } else {
+                mapper = std::make_unique<common_chat_peg_mapper>(msg);
+            }
+            mapper->from_ast(ctx.ast, result);
+
+            if (ctx.is_debug()) {
+                fprintf(stderr, "\nAST for partial parse (fail):\n%s\n", ctx.ast.dump().c_str());
+                fflush(stderr);
+            }
+            return msg;
+        }
+        throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
+                                 effective_input.substr(result.end));
+    }
+
+    common_chat_msg msg;
+    msg.role = "assistant";
+
+    std::unique_ptr<common_chat_peg_mapper> mapper;
+    if (params.format == COMMON_CHAT_FORMAT_PEG_GEMMA4) {
+        mapper = std::make_unique<common_chat_peg_gemma4_mapper>(msg);
+    } else {
+        mapper = std::make_unique<common_chat_peg_mapper>(msg);
+    }
+    mapper->from_ast(ctx.ast, result);
+
+    if (ctx.is_debug()) {
+        fprintf(stderr, "\nAST for %s parse:\n%s\n", is_partial ? "partial" : "full", ctx.ast.dump().c_str());
+        fflush(stderr);
+    }
+
+    if (!is_partial) {
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({ msg }).at(0).dump().c_str());
+    }
+    return msg;
 }
 
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates) {
@@ -3422,3 +2534,4 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
     GGML_ASSERT(chat_templates->template_default != nullptr);
     return chat_templates->template_default->caps.to_map();
 }
+
diff --git a/common/chat.h b/common/chat.h
index 0c40dd18d1..03eb0cc07a 100644
--- a/common/chat.h
+++ b/common/chat.h
@@ -4,16 +4,27 @@
 
 #include "common.h"
 #include "peg-parser.h"
-#include <functional>
+#include "jinja/parser.h"
+#include "jinja/runtime.h"
+#include "jinja/caps.h"
+
+#include "nlohmann/json_fwd.hpp"
+
 #include <chrono>
+#include <functional>
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
 
-#include <nlohmann/json_fwd.hpp>
+using chat_template_caps = jinja::caps;
+using json = nlohmann::ordered_json;
 
 struct common_chat_templates;
 
+namespace autoparser {
+struct generation_params;
+}  // namespace autoparser
+
 struct common_chat_tool_call {
     std::string name;
     std::string arguments;
@@ -38,21 +49,53 @@ struct common_chat_msg_content_part {
     }
 };
 
+struct common_chat_template {
+    jinja::program prog;
+    std::string bos_tok;
+    std::string eos_tok;
+    std::string src;
+    chat_template_caps caps;
+
+    common_chat_template(const std::string & src, const std::string & bos_token, const std::string & eos_token) {
+        jinja::lexer lexer;
+        auto lexer_res = lexer.tokenize(src);
+        this->prog = jinja::parse_from_tokens(lexer_res);
+
+        this->src = lexer_res.source;
+        this->bos_tok = bos_token;
+        this->eos_tok = eos_token;
+
+        this->caps = jinja::caps_get(prog);
+        // LOG_INF("%s: caps:\n%s\n", __func__, this->caps.to_string().c_str());
+    }
+
+    const std::string & source() const { return src; }
+    const std::string & bos_token() const { return bos_tok; }
+    const std::string & eos_token() const { return eos_tok; }
+
+    chat_template_caps original_caps() const {
+        return caps;
+    }
+};
+
 struct common_chat_msg {
-    std::string role;
-    std::string content;
-    std::vector<common_chat_msg_content_part> content_parts = {};
-    std::vector<common_chat_tool_call> tool_calls = {};
-    std::string reasoning_content;
-    std::string tool_name;
-    std::string tool_call_id;
+    std::string                               role;
+    std::string                               content;
+    std::vector<common_chat_msg_content_part> content_parts;
+    std::vector<common_chat_tool_call>        tool_calls;
+    std::string                               reasoning_content;
+    std::string                               tool_name;
+    std::string                               tool_call_id;
 
     nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
 
     bool empty() const {
-        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
+        return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
+               tool_name.empty() && tool_call_id.empty();
     }
-    void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
+
+    void set_tool_call_ids(std::vector<std::string> &           ids_cache,
+                           const std::function<std::string()> & gen_tool_call_id) {
         for (auto i = 0u; i < tool_calls.size(); i++) {
             if (ids_cache.size() <= i) {
                 auto id = tool_calls[i].id;
@@ -64,32 +107,28 @@ struct common_chat_msg {
             tool_calls[i].id = ids_cache[i];
         }
     }
+
     bool operator==(const common_chat_msg & other) const {
-        return role == other.role
-            && content == other.content
-            && content_parts == other.content_parts
-            && tool_calls == other.tool_calls
-            && reasoning_content == other.reasoning_content
-            && tool_name == other.tool_name
-            && tool_call_id == other.tool_call_id;
-    }
-    bool operator!=(const common_chat_msg & other) const {
-        return !(*this == other);
+        return role == other.role && content == other.content && content_parts == other.content_parts &&
+               tool_calls == other.tool_calls && reasoning_content == other.reasoning_content &&
+               tool_name == other.tool_name && tool_call_id == other.tool_call_id;
     }
+
+    bool operator!=(const common_chat_msg & other) const { return !(*this == other); }
 };
 
 struct common_chat_msg_diff {
-    std::string reasoning_content_delta;
-    std::string content_delta;
-    size_t tool_call_index = std::string::npos;
+    std::string           reasoning_content_delta;
+    std::string           content_delta;
+    size_t                tool_call_index = std::string::npos;
     common_chat_tool_call tool_call_delta;
 
-    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
+    static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv,
+                                                           const common_chat_msg & msg_new);
 
     bool operator==(const common_chat_msg_diff & other) const {
-        return content_delta == other.content_delta
-        && tool_call_index == other.tool_call_index
-        && tool_call_delta == other.tool_call_delta;
+        return content_delta == other.content_delta && tool_call_index == other.tool_call_index &&
+               tool_call_delta == other.tool_call_delta;
     }
 };
 
@@ -107,79 +146,63 @@ enum common_chat_tool_choice {
 
 enum common_chat_format {
     COMMON_CHAT_FORMAT_CONTENT_ONLY,
-    COMMON_CHAT_FORMAT_GENERIC,
-    COMMON_CHAT_FORMAT_MISTRAL_NEMO,
-    COMMON_CHAT_FORMAT_MAGISTRAL,
-    COMMON_CHAT_FORMAT_LLAMA_3_X,
-    COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-    COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
-    COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-    COMMON_CHAT_FORMAT_HERMES_2_PRO,
-    COMMON_CHAT_FORMAT_COMMAND_R7B,
-    COMMON_CHAT_FORMAT_GRANITE,
-    COMMON_CHAT_FORMAT_GPT_OSS,
-    COMMON_CHAT_FORMAT_SEED_OSS,
-    COMMON_CHAT_FORMAT_NEMOTRON_V2,
-    COMMON_CHAT_FORMAT_APERTUS,
-    COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
-    COMMON_CHAT_FORMAT_GLM_4_5,
-    COMMON_CHAT_FORMAT_MINIMAX_M2,
-    COMMON_CHAT_FORMAT_KIMI_K2,
-    COMMON_CHAT_FORMAT_QWEN3_CODER_XML,
-    COMMON_CHAT_FORMAT_APRIEL_1_5,
-    COMMON_CHAT_FORMAT_XIAOMI_MIMO,
-    COMMON_CHAT_FORMAT_MIROTHINKER,
 
     // These are intended to be parsed by the PEG parser
     COMMON_CHAT_FORMAT_PEG_SIMPLE,
     COMMON_CHAT_FORMAT_PEG_NATIVE,
-    COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
+    COMMON_CHAT_FORMAT_PEG_GEMMA4,
 
-    COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
+    COMMON_CHAT_FORMAT_COUNT,  // Not a format, just the # formats
 };
 
 struct common_chat_templates_inputs {
-    std::vector<common_chat_msg> messages;
-    std::string grammar;
-    std::string json_schema;
-    bool add_generation_prompt = true;
-    bool use_jinja = true;
+    std::vector<common_chat_msg>          messages;
+    std::string                           grammar;
+    std::string                           json_schema;
+    bool                                  add_generation_prompt = true;
+    bool                                  use_jinja             = true;
     // Parameters below only supported when use_jinja is true
-    std::vector<common_chat_tool> tools;
-    common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
-    bool parallel_tool_calls = false;
-    common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
-    bool enable_thinking = true;
-    std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
-    std::map<std::string, std::string> chat_template_kwargs;
-    bool add_bos = false;
-    bool add_eos = false;
-    bool use_peg = false;
+    std::vector<common_chat_tool>         tools;
+    common_chat_tool_choice               tool_choice         = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    bool                                  parallel_tool_calls = false;
+    common_reasoning_format               reasoning_format    = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
+    bool                                  enable_thinking     = true;
+    std::chrono::system_clock::time_point now                 = std::chrono::system_clock::now();
+    std::map<std::string, std::string>    chat_template_kwargs;
+    bool                                  add_bos = false;
+    bool                                  add_eos = false;
+    bool                                  force_pure_content = false;
 };
 
 struct common_chat_params {
     common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
     std::string                         prompt;
     std::string                         grammar;
-    bool                                grammar_lazy = false;
-    bool                                thinking_forced_open = false;
+    bool                                grammar_lazy         = false;
+    std::string                         generation_prompt;
+    bool                                supports_thinking    = false;
+    std::string                         thinking_start_tag;  // e.g., "<think>"
+    std::string                         thinking_end_tag;    // e.g., "</think>"
     std::vector<common_grammar_trigger> grammar_triggers;
     std::vector<std::string>            preserved_tokens;
     std::vector<std::string>            additional_stops;
     std::string                         parser;
 };
 
-struct common_chat_syntax {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
+struct common_chat_parser_params {
+    common_chat_format      format               = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    common_reasoning_format reasoning_format     = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-    common_peg_arena         parser                = {};
+    bool                    reasoning_in_content = false;
+    std::string             generation_prompt;
+    bool                    parse_tool_calls     = true;
+    bool                    debug                = false;  // Enable debug output for PEG parser
+    common_peg_arena        parser               = {};
+    common_chat_parser_params() = default;
+    common_chat_parser_params(const common_chat_params & chat_params) {
+        format  = chat_params.format;
+        generation_prompt = chat_params.generation_prompt;
+    }
 };
 
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
@@ -187,42 +210,42 @@ bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
 
 void common_chat_templates_free(struct common_chat_templates * tmpls);
 
-struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
+struct common_chat_templates_deleter {
+    void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); }
+};
 
 typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
 
-common_chat_templates_ptr common_chat_templates_init(
-                                    const struct llama_model * model,
-                                           const std::string & chat_template_override,
-                                           const std::string & bos_token_override = "",
-                                           const std::string & eos_token_override = "");
+common_chat_templates_ptr common_chat_templates_init(const struct llama_model * model,
+                                                     const std::string &        chat_template_override,
+                                                     const std::string &        bos_token_override = "",
+                                                     const std::string &        eos_token_override = "");
 
-bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
-std::string  common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
+bool        common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
+std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
 
-struct common_chat_params      common_chat_templates_apply(
-    const struct common_chat_templates * tmpls,
-    const struct common_chat_templates_inputs & inputs);
+struct common_chat_params common_chat_templates_apply(const struct common_chat_templates *        tmpls,
+                                                      const struct common_chat_templates_inputs & inputs);
 
 // Format single message, while taking into account the position of that message in chat history
-std::string common_chat_format_single(
-        const struct common_chat_templates * tmpls,
-        const std::vector<common_chat_msg> & past_msg,
-        const common_chat_msg & new_msg,
-        bool add_ass,
-        bool use_jinja);
+std::string common_chat_format_single(const struct common_chat_templates * tmpls,
+                                      const std::vector<common_chat_msg> & past_msg,
+                                      const common_chat_msg &              new_msg,
+                                      bool                                 add_ass,
+                                      bool                                 use_jinja);
 
 // Returns an example of formatted chat
-std::string common_chat_format_example(
-    const struct common_chat_templates * tmpls,
-    bool use_jinja,
-    const std::map<std::string, std::string> & chat_template_kwargs);
+std::string common_chat_format_example(const struct common_chat_templates *       tmpls,
+                                       bool                                       use_jinja,
+                                       const std::map<std::string, std::string> & chat_template_kwargs);
 
-const char*               common_chat_format_name(common_chat_format format);
-const char*               common_reasoning_format_name(common_reasoning_format format);
-common_reasoning_format   common_reasoning_format_from_name(const std::string& format);
-common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
-common_chat_msg           common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_syntax & syntax);
+const char *    common_chat_format_name(common_chat_format format);
+common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & params);
+common_chat_msg common_chat_peg_parse(const common_peg_arena & src_parser, const std::string & input, bool is_partial, const common_chat_parser_params & params);
+
+// used by arg and server
+const char *            common_reasoning_format_name(common_reasoning_format format);
+common_reasoning_format common_reasoning_format_from_name(const std::string & format);
 
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
 
@@ -241,3 +264,12 @@ nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_
 
 // get template caps, useful for reporting to server /props endpoint
 std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
+
+std::string common_chat_template_direct_apply(
+    const common_chat_template & tmpl,
+    const autoparser::generation_params & inputs);
+
+std::optional<common_chat_params> common_chat_try_specialized_template(
+        const common_chat_template &          tmpl,
+        const std::string &                   src,
+        autoparser::generation_params & params);
diff --git a/common/common.cpp b/common/common.cpp
index d95e100863..f4e75e0cb9 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -407,6 +407,18 @@ void gpt_params_handle_model_default(gpt_params & params) {
     }
 }
 
+static bool is_truthy(const std::string & value) {
+    return value == "on" || value == "enabled" || value == "true" || value == "1";
+}
+
+static bool is_falsey(const std::string & value) {
+    return value == "off" || value == "disabled" || value == "false" || value == "0";
+}
+
+static bool is_autoy(const std::string & value) {
+    return value == "auto" || value == "-1";
+}
+
 bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
     bool invalid_param = false;
     std::string arg;
@@ -1864,7 +1876,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
     }
     if (arg == "--grammar") {
         CHECK_ARG
-        sparams.grammar = argv[i];
+            sparams.grammar = { COMMON_GRAMMAR_TYPE_USER, argv[i] };
         return true;
     }
     if (arg == "--grammar-file") {
@@ -1875,16 +1887,12 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             invalid_param = true;
             return true;
         }
-        std::copy(
-            std::istreambuf_iterator<char>(file),
-            std::istreambuf_iterator<char>(),
-            std::back_inserter(sparams.grammar)
-        );
+        sparams.grammar = {COMMON_GRAMMAR_TYPE_USER, read_file(argv[i])};
         return true;
     }
     if (arg == "-j" || arg == "--json-schema") {
         CHECK_ARG
-        sparams.grammar = json_schema_to_grammar(json::parse(argv[i]));
+        sparams.grammar = { COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT,  json_schema_to_grammar(json::parse(argv[i]))};
         return true;
     }
 
@@ -2063,7 +2071,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         return true;
     }
     if (arg == "--peg") {
-        params.use_peg = true;
         return true;
     }
     if (arg == "--chat-template-kwargs") {
@@ -2071,6 +2078,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         std::string value = argv[i];
         auto parsed = json::parse(value);
         for (const auto& item : parsed.items()) {
+            if (item.key() == "enable_thinking") {
+                LOG_WRN("Setting 'enable_thinking' via --chat-template-kwargs is deprecated. "
+                    "Use --reasoning on / --reasoning off instead.\n");
+            }
             params.default_template_kwargs[item.key()] = item.value().dump();
         }
         return true;
@@ -2081,11 +2092,43 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         params.reasoning_format = common_reasoning_format_from_name(value);
         return true;
     }
+    if (arg == "-rea" || arg == "--reasoning") {
+        CHECK_ARG
+        std::string value = argv[i];
+        if (is_truthy(value)) {
+            params.enable_reasoning = 1;
+            params.default_template_kwargs["enable_thinking"] = "true";
+        } else if (is_falsey(value)) {
+            params.enable_reasoning = 0;
+            params.default_template_kwargs["enable_thinking"] = "false";
+        } else if (is_autoy(value)) {
+            params.enable_reasoning = -1;
+        } else {
+            throw std::invalid_argument(
+                string_format("error: unknown value for --reasoning: '%s'\n", value.c_str()));
+        }
+        return true;
+    }
+    if (arg == "--reasoning-budget-message") {
+        CHECK_ARG
+        std::string value = argv[i];
+        params.reasoning_budget_message = value;
+        return true;
+    }
+    if (arg == "--skip-chat-parsing") {
+        CHECK_ARG
+        params.force_pure_content_parser = true;
+        return true;
+    }
     if (arg == "--no-prefill-assistant") {
         CHECK_ARG
         params.prefill_assistant = false;
         return true;
     }
+    if (arg == "--parallel-tool-calls") {
+        params.parallel_tool_calls = true;
+        return true;
+    }
     if (arg == "--slot-prompt-similarity" || arg == "-sps") {
         CHECK_ARG
         params.slot_prompt_similarity = std::stof(argv[i]);
@@ -2490,9 +2533,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
                                                                         "if suffix/prefix are specified, template will be disabled\n"
                                                                         "only commonly used templates are accepted:\n"
                                                                         "https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
-    options.push_back({ "main",        "       --peg",
-                                                                    "use peg parser for qwen3.5 models.\n"
-                                                                    "https://github.com/ikawrakow/ik_llama.cpp/pull/1490" });
+    options.push_back({ "main",        "       --parallel-tool-calls",  "enable parallel tool calls\n" });
     options.push_back({ "main",        "       --chat-template JINJA_TEMPLATE",
                                                                         "use jinja template for chat (default: disabled)\n" });
     options.push_back({ "main",        "       --chat-template-file file_with_JINJA_TEMPLATE",
@@ -2503,20 +2544,25 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
                         "- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
                         "- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`\n"
                         "(default: none)", });
-    options.push_back({ "main",      "       --chat-template-kwargs JSON",  "sets additional params for the json template parser"});
-    options.push_back({ "main",      "       --reasoning-budget N",  "controls the amount of thinking allowed.\n"
-                                                                                                     "currently only one of: -1 for unrestricted thinking budget, or 0 to disable thinking"
-                                                                                                      "(default: -1)" });
-    options.push_back({ "main",      "       --reasoning-tokens FORMAT",     "exclude reasoning tokens to select the slot more accurately.\n"
+    options.push_back({ "main",        "-rea,  --reasoning",  "[on|off|auto]"
+                                                                 "Use reasoning/thinking in the chat ('on', 'off', or 'auto', default: 'auto' (detect from template))" });
+    options.push_back({ "main",        "       --chat-template-kwargs JSON",  "sets additional params for the json template parser"});
+
+    options.push_back({ "main",        "       --reasoning-budget N",  "token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)" });
+    options.push_back({ "main",        "       --reasoning-tokens FORMAT",     "exclude reasoning tokens to select the slot more accurately.\n"
 						                                                                                            "none: include all tokens\n"
                                                                                                                     "auto: exclude all tokens between <think> and </think>\n"
 						                                                                                            "Or comma separated start and end tokens such as [THINK],[/THINK]\n"
 						                                                                                            "(default: auto)" });
-
-    options.push_back({ "main",      "       --no-prefill-assistant",  "whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n"
+    options.push_back({ "main",        "       --reasoning-budget-message",  "message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)" });
+    options.push_back({ "main",        "       --skip-chat-parsing",  "force a pure content parser, even if a Jinja template is specified; model will output everything "
+            "in the content section, including any reasoning and/or tool calls (default: disabled)" });
+    options.push_back({ "main",        "       --reasoning-budget N",  "token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)" });
+    options.push_back({ "main",        "       --no-prefill-assistant",  "whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)\n"
             "when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled\n" });
+    options.push_back({ "main",        "       -ptc, --parallel-tool-calls", "enable parallel tool calls\n" });
     options.push_back({ "grammar" });
-    options.push_back({ "*",           "       --grammar GRAMMAR",      "BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", sparams.grammar.c_str() });
+    options.push_back({ "*",           "       --grammar GRAMMAR",      "BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", sparams.grammar.grammar.c_str() });
     options.push_back({ "*",           "       --grammar-file FNAME",   "file to read grammar from" });
     options.push_back({ "*",           "-j,    --json-schema SCHEMA",
                                                                         "JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\n"
@@ -4002,6 +4048,15 @@ std::vector<llama_token> llama_tokenize(
     return result;
 }
 
+std::vector<llama_token> common_tokenize(
+    const struct llama_vocab * vocab,
+    const std::string & text,
+    bool   add_special,
+    bool   parse_special){
+
+    return llama_tokenize(vocab, text, add_special, parse_special);
+}
+
 std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
     std::string piece;
     piece.resize(piece.capacity());  // using string internal cache, 15 bytes + '\n'
@@ -4463,7 +4518,7 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
     fprintf(stream, "escape: %s # default: false\n", params.escape ? "true" : "false");
     fprintf(stream, "file: # never logged, see prompt instead. Can still be specified for input.\n");
     fprintf(stream, "frequency_penalty: %f # default: 0.0 \n", sparams.penalty_freq);
-    yaml_dump_string_multiline(stream, "grammar", sparams.grammar.c_str());
+    yaml_dump_string_multiline(stream, "grammar", sparams.grammar.grammar.c_str());
     fprintf(stream, "grammar-file: # never logged, see grammar instead. Can still be specified for input.\n");
     fprintf(stream, "hellaswag: %s # default: false\n", params.hellaswag ? "true" : "false");
     fprintf(stream, "hellaswag_tasks: %zu # default: 400\n", params.hellaswag_tasks);
diff --git a/common/common.h b/common/common.h
index 3616da8a32..84121aebe0 100644
--- a/common/common.h
+++ b/common/common.h
@@ -27,6 +27,7 @@
 #include <tuple>
 #include <map>
 #include <sstream>
+#include <variant>
 
 #ifdef _WIN32
 #define DIRECTORY_SEPARATOR '\\'
@@ -149,7 +150,6 @@ enum common_speculative_type {
     COMMON_SPECULATIVE_TYPE_COUNT          // number of types, unknown type
 };
 
-
 struct common_params_model {
     std::string path        = ""; // model local path                                       // NOLINT
     std::string url         = ""; // model url to download                                  // NOLINT
@@ -416,14 +416,23 @@ struct gpt_params {
 
     std::string hostname      = "127.0.0.1";
     std::string public_path   = "";
+
+    // tool call and template
     std::string chat_template = "";
     bool use_jinja = false;                                                                                 // NOLINT
     bool use_peg = false;
     std::string system_prompt = "";
     bool enable_chat_template = true;
+    bool force_pure_content_parser = false;
+    bool parallel_tool_calls = false;
     common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+    int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
+    int reasoning_budget = -1;
+    std::string reasoning_budget_message; // message injected before end tag when budget exhausted
+    std::map<std::string, std::string> default_template_kwargs;
+
     thinking_tokens think_tokens;
-    int reasoning_budget      = -1;
+
     bool prefill_assistant    = true;
     bool dry_run              = false;
 
@@ -432,7 +441,7 @@ struct gpt_params {
     std::string ssl_file_key  = "";
     std::string ssl_file_cert = "";
 
-    std::map<std::string, std::string> default_template_kwargs;
+
 
     // "advanced" endpoints are disabled by default for better security
     common_webui webui = COMMON_WEBUI_AUTO;
@@ -642,12 +651,18 @@ std::vector<llama_token> common_tokenize(
                         bool   add_special,
                         bool   parse_special = false);
 
-std::vector<llama_token> llama_tokenize(
+std::vector<llama_token> common_tokenize(
     const struct llama_vocab* vocab,
     const std::string& text,
     bool   add_special,
     bool   parse_special = false);
 
+std::vector<llama_token> llama_tokenize(
+    const struct llama_vocab * vocab,
+    const std::string & text,
+    bool   add_special,
+    bool   parse_special = false);
+
 // tokenizes a token into a piece, optionally renders special/control tokens
 // should work similar to Python's `tokenizer.id_to_piece`
 std::string common_token_to_piece(
diff --git a/common/jinja/caps.cpp b/common/jinja/caps.cpp
index dbaaed500a..ec207a53e8 100644
--- a/common/jinja/caps.cpp
+++ b/common/jinja/caps.cpp
@@ -1,3 +1,4 @@
+#include "log.h"
 #include "value.h"
 #include "runtime.h"
 #include "caps.h"
@@ -36,12 +37,16 @@ static void caps_try_execute(jinja::program & prog,
     auto tools = ctx.get_val("tools");
 
     bool success = false;
+    std::string result;
     try {
         jinja::runtime runtime(ctx);
-        runtime.execute(prog);
+        auto results = runtime.execute(prog);
+        auto parts = jinja::runtime::gather_string_parts(results);
+        result = parts->as_string().str();
         success = true;
     } catch (const std::exception & e) {
         JJ_DEBUG("Exception during execution: %s", e.what());
+        result = "";
         // ignore exceptions during capability analysis
     }
 
@@ -70,6 +75,7 @@ std::map<std::string, bool> caps::to_map() const {
         {"supports_parallel_tool_calls", supports_parallel_tool_calls},
         {"supports_system_role", supports_system_role},
         {"supports_preserve_reasoning", supports_preserve_reasoning},
+        {"supports_object_arguments", supports_object_arguments},
     };
 }
 
@@ -90,6 +96,8 @@ caps caps_get(jinja::program & prog) {
         return v->stats.ops.find(op_name) != v->stats.ops.end();
     };
 
+    JJ_DEBUG("%s\n", ">>> Running capability check: typed content");
+
     // case: typed content support
     caps_try_execute(
         prog,
@@ -120,6 +128,7 @@ caps caps_get(jinja::program & prog) {
         }
     );
 
+    JJ_DEBUG("%s\n", ">>> Running capability check: system prompt");
 
     // case: system prompt support
     caps_try_execute(
@@ -150,7 +159,9 @@ caps caps_get(jinja::program & prog) {
         }
     );
 
-    // case: tools support
+    JJ_DEBUG("%s\n", ">>> Running capability check: single tool with object arguments support");
+
+    // case: tools support: single call with object arguments
     caps_try_execute(
         prog,
         [&]() {
@@ -162,10 +173,10 @@ caps caps_get(jinja::program & prog) {
                 },
                 {
                     {"role", "assistant"},
-                    {"content", "Assistant message"},
+                    {"content", ""}, // Some templates expect content to be empty with tool calls
                     {"tool_calls", json::array({
                         {
-                            {"id", "call1"},
+                            {"id", "call00001"},
                             {"type", "function"},
                             {"function", {
                                 {"name", "tool1"},
@@ -173,19 +184,18 @@ caps caps_get(jinja::program & prog) {
                                     {"arg", "value"}
                                 }}
                             }}
-                        },
-                        {
-                            {"id", "call2"},
-                            {"type", "function"},
-                            {"function", {
-                                {"name", "tool2"},
-                                {"arguments", {
-                                    {"arg", "value"}
-                                }}
-                            }}
                         }
                     })}
                 },
+                {
+                    {"role", "tool"},
+                    {"content", "Tool response"},
+                    {"tool_call_id", "call00001"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "The tool response was 'tool response'"}
+                },
                 {
                     {"role", "user"},
                     {"content", "User message"},
@@ -199,7 +209,7 @@ caps caps_get(jinja::program & prog) {
                     {"name", "tool"},
                     {"type", "function"},
                     {"function", {
-                        {"name", "tool"},
+                        {"name", "tool1"},
                         {"description", "Tool description"},
                         {"parameters", {
                             {"type", "object"},
@@ -217,13 +227,12 @@ caps caps_get(jinja::program & prog) {
         },
         [&](bool success, value & messages, value & tools) {
             if (!success) {
-                result.supports_tool_calls = false;
-                result.supports_tools = false;
-                return;
+                return; // Nothing can be inferred
             }
 
             auto & tool_name = tools->at(0)->at("function")->at("name");
             caps_print_stats(tool_name, "tools[0].function.name");
+            caps_print_stats(tools, "tools");
             if (!tool_name->stats.used) {
                 result.supports_tools = false;
             }
@@ -232,8 +241,192 @@ caps caps_get(jinja::program & prog) {
             caps_print_stats(tool_calls, "messages[1].tool_calls");
             if (!tool_calls->stats.used) {
                 result.supports_tool_calls = false;
+                return;
             }
 
+            auto & tool_arg = tool_calls->at(0)->at("function")->at("arguments")->at("arg");
+            caps_print_stats(tool_arg, "messages[1].tool_calls[0].function.arguments.arg");
+            if (tool_arg->stats.used) {
+                result.supports_object_arguments = true;
+            }
+        }
+    );
+
+    if (!result.supports_object_arguments) {
+        JJ_DEBUG("%s\n", ">>> Running capability check: single tool with string arguments support");
+
+        // case: tools support: single call with string arguments
+        caps_try_execute(
+            prog,
+            [&]() {
+                // messages
+                return json::array({
+                    {
+                        {"role", "user"},
+                        {"content", "User message"},
+                    },
+                    {
+                        {"role", "assistant"},
+                        {"content", ""}, // Some templates expect content to be empty with tool calls
+                        {"tool_calls", json::array({
+                            {
+                                {"id", "call00001"},
+                                {"type", "function"},
+                                {"function", {
+                                    {"name", "tool1"},
+                                    {"arguments", R"({"arg": "value"})"}
+                                }}
+                            }
+                        })}
+                    },
+                    {
+                        {"role", "tool"},
+                        {"content", "Tool response"},
+                        {"tool_call_id", "call00001"}
+                    },
+                    {
+                        {"role", "assistant"},
+                        {"content", "The tool response was 'tool response'"}
+                    },
+                    {
+                        {"role", "user"},
+                        {"content", "User message"},
+                    },
+                });
+            },
+            [&]() {
+                // tools
+                return json::array({
+                    {
+                        {"name", "tool"},
+                        {"type", "function"},
+                        {"function", {
+                            {"name", "tool1"},
+                            {"description", "Tool description"},
+                            {"parameters", {
+                                {"type", "object"},
+                                {"properties", {
+                                    {"arg", {
+                                        {"type", "string"},
+                                        {"description", "Arg description"},
+                                    }},
+                                }},
+                                {"required", json::array({ "arg" })},
+                            }},
+                        }},
+                    },
+                });
+            },
+            [&](bool success, value & messages, value & tools) {
+                if (!success) {
+                    result.supports_tool_calls = false;
+                    result.supports_tools = false;
+                    return;
+                }
+
+                auto & tool_name = tools->at(0)->at("function")->at("name");
+                caps_print_stats(tool_name, "tools[0].function.name");
+                caps_print_stats(tools, "tools");
+                if (!tool_name->stats.used) {
+                    result.supports_tools = false;
+                }
+
+                auto & tool_calls = messages->at(1)->at("tool_calls");
+                caps_print_stats(tool_calls, "messages[1].tool_calls");
+                if (!tool_calls->stats.used) {
+                    result.supports_tool_calls = false;
+                    return;
+                }
+            }
+        );
+    }
+
+    JJ_DEBUG("%s\n", ">>> Running capability check: parallel tool support");
+
+    // case: tools support: parallel calls
+    caps_try_execute(
+        prog,
+        [&]() {
+            json args = json(R"({"arg": "value"})");
+            if (result.supports_object_arguments) {
+                args = json{{"arg", "value"}};
+            }
+
+            // messages
+            return json::array({
+                {
+                    {"role", "user"},
+                    {"content", "User message"},
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", ""}, // Some templates expect content to be empty with tool calls
+                    {"tool_calls", json::array({
+                        {
+                            {"id", "call00001"},
+                            {"type", "function"},
+                            {"function", {
+                                {"name", "tool1"},
+                                {"arguments", args}
+                            }}
+                        },
+                        {
+                            {"id", "call00002"},
+                            {"type", "function"},
+                            {"function", {
+                                {"name", "tool1"},
+                                {"arguments", args}
+                            }}
+                        }
+                    })}
+                },
+                {
+                    {"role", "tool"},
+                    {"content", "Tool response"},
+                    {"tool_call_id", "call00001"}
+                },
+                {
+                    {"role", "assistant"},
+                    {"content", "The tool response was 'tool response'"}
+                },
+                {
+                    {"role", "user"},
+                    {"content", "User message"},
+                },
+            });
+        },
+        [&]() {
+            // tools
+            return json::array({
+                {
+                    {"name", "tool"},
+                    {"type", "function"},
+                    {"function", {
+                        {"name", "tool1"},
+                        {"description", "Tool description"},
+                        {"parameters", {
+                            {"type", "object"},
+                            {"properties", {
+                                {"arg", {
+                                    {"type", "string"},
+                                    {"description", "Arg description"},
+                                }},
+                            }},
+                            {"required", json::array({ "arg" })},
+                        }},
+                    }},
+                },
+            });
+        },
+        [&](bool success, value & messages, value & /*tools*/) {
+            if (!success) {
+                result.supports_parallel_tool_calls = false;
+                return;
+            }
+
+            auto & tool_calls = messages->at(1)->at("tool_calls");
+            caps_print_stats(tool_calls, "messages[1].tool_calls");
+
             // check for second tool call usage
             auto & tool_call_1 = tool_calls->at(1)->at("function");
             caps_print_stats(tool_call_1, "messages[1].tool_calls[1].function");
@@ -243,6 +436,8 @@ caps caps_get(jinja::program & prog) {
         }
     );
 
+    JJ_DEBUG("%s\n", ">>> Running capability check: preserve reasoning");
+
     // case: preserve reasoning content in chat history
     caps_try_execute(
         prog,
diff --git a/common/jinja/caps.h b/common/jinja/caps.h
index e694e7bfaa..93a7fe0926 100644
--- a/common/jinja/caps.h
+++ b/common/jinja/caps.h
@@ -18,6 +18,8 @@ struct caps {
     bool supports_string_content = true;
     bool supports_typed_content = false;
 
+    bool supports_object_arguments = false;
+
     // for reporting on server
     std::map<std::string, bool> to_map() const;
 
diff --git a/common/jinja/parser.cpp b/common/jinja/parser.cpp
index 7970336ac0..2b25654a7a 100644
--- a/common/jinja/parser.cpp
+++ b/common/jinja/parser.cpp
@@ -53,6 +53,13 @@ class parser {
         return tokens[current + offset];
     }
 
+    const token & next() {
+        if (current >= tokens.size()) {
+            throw parser_exception("Parser Error: Unexpected EOF", source, tokens.empty() ? 0 : tokens.back().pos);
+        }
+        return tokens[current++];
+    }
+
     token expect(token::type type, const std::string&  error) {
         const auto & t = peek();
         if (t.t != type) {
@@ -90,9 +97,9 @@ class parser {
         size_t start_pos = current;
         switch (peek().t) {
             case token::comment:
-                return mk_stmt<comment_statement>(start_pos, tokens[current++].value);
+                return mk_stmt<comment_statement>(start_pos, next().value);
             case token::text:
-                return mk_stmt<string_literal>(start_pos, tokens[current++].value);
+                return mk_stmt<string_literal>(start_pos, next().value);
             case token::open_statement:
                 return parse_jinja_statement();
             case token::open_expression:
@@ -119,8 +126,7 @@ class parser {
         }
 
         size_t start_pos = current;
-        std::string name = peek().value;
-        current++; // consume identifier
+        std::string name = next().value;
 
         statement_ptr result;
         if (name == "set") {
@@ -202,7 +208,7 @@ class parser {
             // Ignore generation blocks (transformers-specific)
             // See https://github.com/huggingface/transformers/pull/30650 for more information.
             result = mk_stmt<noop_statement>(start_pos);
-            current++;
+            ++current;
 
         } else {
             throw std::runtime_error("Unknown statement: " + name);
@@ -217,7 +223,7 @@ class parser {
         statements body;
 
         if (is(token::equals)) {
-            current++;
+            ++current;
             value = parse_expression_sequence();
         } else {
             // parsing multiline set here
@@ -280,7 +286,7 @@ class parser {
         exprs.push_back(primary ? parse_primary_expression() : parse_expression());
         bool is_tuple = is(token::comma);
         while (is(token::comma)) {
-            current++; // consume comma
+            ++current; // consume comma
             exprs.push_back(primary ? parse_primary_expression() : parse_expression());
         }
         return is_tuple ? mk_stmt<tuple_literal>(start_pos, std::move(exprs)) : std::move(exprs[0]);
@@ -290,7 +296,7 @@ class parser {
         // e.g., `message` in `for message in messages`
         auto loop_var = parse_expression_sequence(true); // should be an identifier/tuple
         if (!is_identifier("in")) throw std::runtime_error("Expected 'in'");
-        current++;
+        ++current; // consume 'in'
 
         // `messages` in `for message in messages`
         auto iterable = parse_expression();
@@ -305,7 +311,8 @@ class parser {
         }
 
         if (is_statement({"else"})) {
-            current += 2;
+            ++current; // consume {%
+            ++current; // consume 'else'
             expect(token::close_statement, "Expected %}");
             while (!is_statement({"endfor"})) {
                 alternate.push_back(parse_any());
@@ -347,7 +354,7 @@ class parser {
         auto left = parse_logical_and_expression();
         while (is_identifier("or")) {
             size_t start_pos = current;
-            token op = tokens[current++];
+            token op = next();
             left = mk_stmt<binary_expression>(start_pos, op, std::move(left), parse_logical_and_expression());
         }
         return left;
@@ -357,7 +364,7 @@ class parser {
         auto left = parse_logical_negation_expression();
         while (is_identifier("and")) {
             size_t start_pos = current;
-            auto op = tokens[current++];
+            auto op = next();
             left = mk_stmt<binary_expression>(start_pos, op, std::move(left), parse_logical_negation_expression());
         }
         return left;
@@ -367,7 +374,7 @@ class parser {
         // Try parse unary operators
         if (is_identifier("not")) {
             size_t start_pos = current;
-            auto op = tokens[current++];
+            auto op = next();
             return mk_stmt<unary_expression>(start_pos, op, parse_logical_negation_expression());
         }
         return parse_comparison_expression();
@@ -382,11 +389,12 @@ class parser {
             size_t start_pos = current;
             if (is_identifier("not") && peek(1).t == token::identifier && peek(1).value == "in") {
                 op = {token::identifier, "not in", tokens[current].pos};
-                current += 2;
+                ++current; // consume 'not'
+                ++current; // consume 'in'
             } else if (is_identifier("in")) {
-                op = tokens[current++];
+                op = next();
             } else if (is(token::comparison_binary_operator)) {
-                op = tokens[current++];
+                op = next();
             } else break;
             left = mk_stmt<binary_expression>(start_pos, op, std::move(left), parse_additive_expression());
         }
@@ -397,7 +405,7 @@ class parser {
         auto left = parse_multiplicative_expression();
         while (is(token::additive_binary_operator)) {
             size_t start_pos = current;
-            auto op = tokens[current++];
+            auto op = next();
             left = mk_stmt<binary_expression>(start_pos, op, std::move(left), parse_multiplicative_expression());
         }
         return left;
@@ -407,7 +415,7 @@ class parser {
         auto left = parse_test_expression();
         while (is(token::multiplicative_binary_operator)) {
             size_t start_pos = current;
-            auto op = tokens[current++];
+            auto op = next();
             left = mk_stmt<binary_expression>(start_pos, op, std::move(left), parse_test_expression());
         }
         return left;
@@ -417,9 +425,9 @@ class parser {
         auto operand = parse_filter_expression();
         while (is_identifier("is")) {
             size_t start_pos = current;
-            current++;
+            ++current; // consume 'is'
             bool negate = false;
-            if (is_identifier("not")) { current++; negate = true; }
+            if (is_identifier("not")) { ++current; negate = true; }
             auto test_id = parse_primary_expression();
             // FIXME: tests can also be expressed like this: if x is eq 3
             if (is(token::open_paren)) test_id = parse_call_expression(std::move(test_id));
@@ -432,7 +440,7 @@ class parser {
         auto operand = parse_call_member_expression();
         while (is(token::pipe)) {
             size_t start_pos = current;
-            current++;
+            ++current; // consume pipe
             auto filter = parse_primary_expression();
             if (is(token::open_paren)) filter = parse_call_expression(std::move(filter));
             operand = mk_stmt<filter_expression>(start_pos, std::move(operand), std::move(filter));
@@ -490,7 +498,7 @@ class parser {
     statement_ptr parse_member_expression(statement_ptr object) {
         size_t start_pos = current;
         while (is(token::dot) || is(token::open_square_bracket)) {
-            auto op = tokens[current++];
+            auto op = next();
             bool computed = op.t == token::open_square_bracket;
             statement_ptr prop;
             if (computed) {
@@ -531,12 +539,15 @@ class parser {
             statement_ptr step = slices.size() > 2 ? std::move(slices[2]) : nullptr;
             return mk_stmt<slice_expression>(start_pos, std::move(start), std::move(stop), std::move(step));
         }
+        if (slices.empty()) {
+            return mk_stmt<blank_expression>(start_pos);
+        }
         return std::move(slices[0]);
     }
 
     statement_ptr parse_primary_expression() {
         size_t start_pos = current;
-        auto t = tokens[current++];
+        auto t = next();
         switch (t.t) {
             case token::numeric_literal:
                 if (t.value.find('.') != std::string::npos) {
@@ -547,7 +558,7 @@ class parser {
             case token::string_literal: {
                 std::string val = t.value;
                 while (is(token::string_literal)) {
-                    val += tokens[current++].value;
+                    val += next().value;
                 }
                 return mk_stmt<string_literal>(start_pos, val);
             }
@@ -562,9 +573,9 @@ class parser {
                 statements vals;
                 while (!is(token::close_square_bracket)) {
                     vals.push_back(parse_expression());
-                    if (is(token::comma)) current++;
+                    if (is(token::comma)) ++current;
                 }
-                current++;
+                ++current;
                 return mk_stmt<array_literal>(start_pos, std::move(vals));
             }
             case token::open_curly_bracket: {
@@ -573,9 +584,9 @@ class parser {
                     auto key = parse_expression();
                     expect(token::colon, "Expected :");
                     pairs.push_back({std::move(key), parse_expression()});
-                    if (is(token::comma)) current++;
+                    if (is(token::comma)) ++current;
                 }
-                current++;
+                ++current;
                 return mk_stmt<object_literal>(start_pos, std::move(pairs));
             }
             default:
diff --git a/common/jinja/runtime.cpp b/common/jinja/runtime.cpp
index 5757c76b7a..f81d98d954 100644
--- a/common/jinja/runtime.cpp
+++ b/common/jinja/runtime.cpp
@@ -114,8 +114,10 @@ value binary_expression::execute_impl(context & ctx) {
 
     // Logical operators
     if (op.value == "and") {
+        JJ_DEBUG("Executing logical test: %s AND %s", left->type().c_str(), right->type().c_str());
         return left_val->as_bool() ? right->execute(ctx) : std::move(left_val);
     } else if (op.value == "or") {
+        JJ_DEBUG("Executing logical test: %s OR %s", left->type().c_str(), right->type().c_str());
         return left_val->as_bool() ? std::move(left_val) : right->execute(ctx);
     }
 
@@ -249,6 +251,23 @@ value binary_expression::execute_impl(context & ctx) {
         return res;
     }
 
+    // Python-style string repetition
+    // TODO: support array/tuple repetition (e.g., [1, 2] * 3 → [1, 2, 1, 2, 1, 2])
+    if (op.value == "*" &&
+            ((is_val<value_string>(left_val) && is_val<value_int>(right_val)) ||
+             (is_val<value_int>(left_val) && is_val<value_string>(right_val)))) {
+        const auto & str = is_val<value_string>(left_val) ? left_val->as_string() : right_val->as_string();
+        const int64_t repeat = is_val<value_int>(right_val) ? right_val->as_int() : left_val->as_int();
+        auto res = mk_val<value_string>();
+        if (repeat <= 0) {
+            return res;
+        }
+        for (int64_t i = 0; i < repeat; ++i) {
+            res->val_str = res->val_str.append(str);
+        }
+        return res;
+    }
+
     // String membership
     if (is_val<value_string>(left_val) && is_val<value_string>(right_val)) {
         // case: "a" in "abc"
@@ -304,6 +323,19 @@ value filter_expression::execute_impl(context & ctx) {
             filter_id = "strip"; // alias
         }
         JJ_DEBUG("Applying filter '%s' to %s", filter_id.c_str(), input->type().c_str());
+        // TODO: Refactor filters so this coercion can be done automatically
+        if (!input->is_undefined() && !is_val<value_string>(input) && (
+            filter_id == "capitalize" ||
+            filter_id == "lower" ||
+            filter_id == "replace" ||
+            filter_id == "strip" ||
+            filter_id == "title" ||
+            filter_id == "upper" ||
+            filter_id == "wordcount"
+        )) {
+            JJ_DEBUG("Coercing %s to String for '%s' filter", input->type().c_str(), filter_id.c_str());
+            input = mk_val<value_string>(input->as_string());
+        }
         return try_builtin_func(ctx, filter_id, input)->invoke(func_args(ctx));
 
     } else if (is_stmt<call_expression>(filter)) {
@@ -665,8 +697,9 @@ value macro_statement::execute_impl(context & ctx) {
                 if (is_stmt<identifier>(this->args[i])) {
                     // normal parameter
                     std::string param_name = cast_stmt<identifier>(this->args[i])->val;
-                    JJ_DEBUG("  Binding parameter '%s' to argument of type %s", param_name.c_str(), args.get_pos(i)->type().c_str());
-                    macro_ctx.set_val(param_name, args.get_pos(i));
+                    value param_value = args.get_kwarg_or_pos(param_name, i);
+                    JJ_DEBUG("  Binding parameter '%s' to argument of type %s", param_name.c_str(), param_value->type().c_str());
+                    macro_ctx.set_val(param_name, param_value);
                 } else if (is_stmt<keyword_argument_expression>(this->args[i])) {
                     // default argument used as normal parameter
                     auto kwarg = cast_stmt<keyword_argument_expression>(this->args[i]);
@@ -674,8 +707,9 @@ value macro_statement::execute_impl(context & ctx) {
                         throw std::runtime_error("Keyword argument key must be an identifier in macro '" + name + "'");
                     }
                     std::string param_name = cast_stmt<identifier>(kwarg->key)->val;
-                    JJ_DEBUG("  Binding parameter '%s' to argument of type %s", param_name.c_str(), args.get_pos(i)->type().c_str());
-                    macro_ctx.set_val(param_name, args.get_pos(i));
+                    value param_value = args.get_kwarg_or_pos(param_name, i);
+                    JJ_DEBUG("  Binding parameter '%s' to argument of type %s", param_name.c_str(), param_value->type().c_str());
+                    macro_ctx.set_val(param_name, param_value);
                 } else {
                     throw std::runtime_error("Invalid parameter type in macro '" + name + "'");
                 }
@@ -767,10 +801,15 @@ value member_expression::execute_impl(context & ctx) {
     }
 
     JJ_DEBUG("Member expression on object type %s, property type %s", object->type().c_str(), property->type().c_str());
-    ensure_key_type_allowed(property);
-
     value val = mk_val<value_undefined>("object_property");
 
+    if (property->is_undefined()) {
+        JJ_DEBUG("%s", "Member expression property is undefined, returning undefined");
+        return val;
+    }
+
+    ensure_key_type_allowed(property);
+
     if (is_val<value_undefined>(object)) {
         JJ_DEBUG("%s", "Accessing property on undefined object, returning undefined");
         return val;
@@ -838,7 +877,7 @@ value call_expression::execute_impl(context & ctx) {
     for (auto & arg_stmt : this->args) {
         auto arg_val = arg_stmt->execute(ctx);
         JJ_DEBUG("  Argument type: %s", arg_val->type().c_str());
-        args.push_back(std::move(arg_val));
+        args.push_back(arg_val);
     }
     // execute callee
     value callee_val = callee->execute(ctx);
diff --git a/common/jinja/runtime.h b/common/jinja/runtime.h
index 17a6dff5aa..3ca5f1754f 100644
--- a/common/jinja/runtime.h
+++ b/common/jinja/runtime.h
@@ -263,6 +263,14 @@ struct comment_statement : public statement {
 
 // Expressions
 
+// Represents an omitted expression in a computed member, e.g. `a[]`.
+struct blank_expression : public expression {
+    std::string type() const override { return "BlankExpression"; }
+    value execute_impl(context &) override {
+        return mk_val<value_undefined>();
+    }
+};
+
 struct member_expression : public expression {
     statement_ptr object;
     statement_ptr property;
diff --git a/common/jinja/value.cpp b/common/jinja/value.cpp
index 749113124b..8e86a715f5 100644
--- a/common/jinja/value.cpp
+++ b/common/jinja/value.cpp
@@ -1,4 +1,5 @@
 #include "runtime.h"
+#include "unicode.h"
 #include "value.h"
 
 // for converting from JSON to jinja values
@@ -154,6 +155,83 @@ static value test_compare_fn(const func_args & args) {
     return mk_val<value_bool>(value_compare(args.get_pos(0), args.get_pos(1), op));
 }
 
+static void append_codepoint_as_ascii_json_escape(std::string & out, uint32_t codepoint) {
+    auto append_u16 = [&out](uint32_t value) {
+        char buf[8];
+        snprintf(buf, sizeof(buf), "\\u%04x", static_cast<unsigned int>(value));
+        out += buf;
+    };
+
+    if (codepoint <= 0xFFFF) {
+        append_u16(codepoint);
+        return;
+    }
+
+    codepoint -= 0x10000;
+    append_u16(0xD800 + ((codepoint >> 10) & 0x3FF));
+    append_u16(0xDC00 + (codepoint & 0x3FF));
+}
+
+static std::string json_ensure_ascii_preserving_format(const std::string & json_str) {
+    std::string output;
+    output.reserve(json_str.size());
+
+    bool in_string = false;
+    bool escaped = false;
+
+    for (size_t pos = 0; pos < json_str.size();) {
+        const char ch = json_str[pos];
+        if (!in_string) {
+            output.push_back(ch);
+            if (ch == '"') {
+                in_string = true;
+            }
+            ++pos;
+            continue;
+        }
+
+        if (escaped) {
+            output.push_back(ch);
+            escaped = false;
+            ++pos;
+            continue;
+        }
+
+        if (ch == '\\') {
+            output.push_back(ch);
+            escaped = true;
+            ++pos;
+            continue;
+        }
+
+        if (ch == '"') {
+            output.push_back(ch);
+            in_string = false;
+            ++pos;
+            continue;
+        }
+
+        const unsigned char uch = static_cast<unsigned char>(ch);
+        if (uch < 0x80) {
+            output.push_back(ch);
+            ++pos;
+            continue;
+        }
+
+        auto parsed = common_parse_utf8_codepoint(json_str, pos);
+        if (parsed.status != utf8_parse_result::SUCCESS) {
+            output += "\\ufffd";
+            ++pos;
+            continue;
+        }
+
+        append_codepoint_as_ascii_json_escape(output, parsed.codepoint);
+        pos += parsed.bytes_consumed;
+    }
+
+    return output;
+}
+
 static value tojson(const func_args & args) {
     args.ensure_count(1, 5);
     value val_ascii      = args.get_kwarg_or_pos("ensure_ascii", 1);
@@ -169,16 +247,17 @@ static value tojson(const func_args & args) {
     if (is_val<value_int>(val_indent)) {
         indent = static_cast<int>(val_indent->as_int());
     }
-    if (val_ascii->as_bool()) { // undefined == false
-        throw not_implemented_exception("tojson ensure_ascii=true not implemented");
-    }
     if (val_sort->as_bool()) { // undefined == false
         throw not_implemented_exception("tojson sort_keys=true not implemented");
     }
+    const bool ensure_ascii = val_ascii->as_bool(); // undefined == false
     auto separators = (is_val<value_array>(val_separators) ? val_separators : mk_val<value_array>())->as_array();
     std::string item_sep = separators.size() > 0 ? separators[0]->as_string().str() : (indent < 0 ? ", " : ",");
     std::string key_sep = separators.size() > 1 ? separators[1]->as_string().str() : ": ";
     std::string json_str = value_to_json(args.get_pos(0), indent, item_sep, key_sep);
+    if (ensure_ascii) {
+        json_str = json_ensure_ascii_preserving_format(json_str);
+    }
     return mk_val<value_string>(json_str);
 }
 
@@ -460,13 +539,18 @@ const func_builtins & value_int_t::get_builtins() const {
             int64_t val = args.get_pos(0)->as_int();
             return mk_val<value_int>(val < 0 ? -val : val);
         }},
+        {"int", [](const func_args & args) -> value {
+            args.ensure_vals<value_int>();
+            return mk_val<value_int>(args.get_pos(0)->as_int());
+        }},
         {"float", [](const func_args & args) -> value {
             args.ensure_vals<value_int>();
             double val = static_cast<double>(args.get_pos(0)->as_int());
             return mk_val<value_float>(val);
         }},
-        {"tojson", tojson},
+        {"safe", tojson},
         {"string", tojson},
+        {"tojson", tojson},
     };
     return builtins;
 }
@@ -485,8 +569,13 @@ const func_builtins & value_float_t::get_builtins() const {
             int64_t val = static_cast<int64_t>(args.get_pos(0)->as_float());
             return mk_val<value_int>(val);
         }},
-        {"tojson", tojson},
+        {"float", [](const func_args & args) -> value {
+            args.ensure_vals<value_float>();
+            return mk_val<value_float>(args.get_pos(0)->as_float());
+        }},
+        {"safe", tojson},
         {"string", tojson},
+        {"tojson", tojson},
     };
     return builtins;
 }
@@ -771,6 +860,11 @@ const func_builtins & value_string_t::get_builtins() const {
 
 
 const func_builtins & value_bool_t::get_builtins() const {
+    static const func_handler tostring = [](const func_args & args) -> value {
+        args.ensure_vals<value_bool>();
+        bool val = args.get_pos(0)->as_bool();
+        return mk_val<value_string>(val ? "True" : "False");
+    };
     static const func_builtins builtins = {
         {"default", default_value},
         {"int", [](const func_args & args) -> value {
@@ -783,11 +877,8 @@ const func_builtins & value_bool_t::get_builtins() const {
             bool val = args.get_pos(0)->as_bool();
             return mk_val<value_float>(val ? 1.0 : 0.0);
         }},
-        {"string", [](const func_args & args) -> value {
-            args.ensure_vals<value_bool>();
-            bool val = args.get_pos(0)->as_bool();
-            return mk_val<value_string>(val ? "True" : "False");
-        }},
+        {"safe", tostring},
+        {"string", tostring},
         {"tojson", tojson},
     };
     return builtins;
@@ -1100,18 +1191,14 @@ const func_builtins & value_object_t::get_builtins() const {
 }
 
 const func_builtins & value_none_t::get_builtins() const {
+    static const func_handler tostring = [](const func_args &) -> value {
+        return mk_val<value_string>("None");
+    };
     static const func_builtins builtins = {
         {"default", default_value},
         {"tojson", tojson},
-        {"string", [](const func_args &) -> value {
-            return mk_val<value_string>("None");
-        }},
-        {"safe", [](const func_args &) -> value {
-            return mk_val<value_string>("None");
-        }},
-        {"strip", [](const func_args &) -> value {
-            return mk_val<value_string>("None");
-        }},
+        {"string", tostring},
+        {"safe", tostring},
         {"items", empty_value_fn<value_array>},
         {"map", empty_value_fn<value_array>},
         {"reject", empty_value_fn<value_array>},
diff --git a/common/jinja/value.h b/common/jinja/value.h
index 07e447ff69..7d164588ad 100644
--- a/common/jinja/value.h
+++ b/common/jinja/value.h
@@ -12,8 +12,8 @@
 #include <set>
 #include <sstream>
 #include <string>
-#include <unordered_map>
 #include <vector>
+#include <unordered_map>
 
 namespace jinja {
 
@@ -451,7 +451,7 @@ struct value_array_t : public value_t {
     }
 protected:
     virtual bool equivalent(const value_t & other) const override {
-        return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_arr.begin(), val_arr.end(), other.val_arr.begin(), value_equivalence());
+        return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_arr.begin(), val_arr.end(), other.val_arr.begin(), other.val_arr.end(), value_equivalence());
     }
 };
 using value_array = std::shared_ptr<value_array_t>;
@@ -587,7 +587,7 @@ struct value_object_t : public value_t {
     }
 protected:
     virtual bool equivalent(const value_t & other) const override {
-        return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_obj.begin(), val_obj.end(), other.val_obj.begin(), value_equivalence());
+        return typeid(*this) == typeid(other) && is_hashable() && other.is_hashable() && std::equal(val_obj.begin(), val_obj.end(), other.val_obj.begin(), other.val_obj.end(), value_equivalence());
     }
 };
 using value_object = std::shared_ptr<value_object_t>;
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp
index 5be20c4ad3..4028420cf1 100644
--- a/common/json-schema-to-grammar.cpp
+++ b/common/json-schema-to-grammar.cpp
@@ -30,11 +30,11 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     if (separator_rule.empty()) {
         if (min_items == 1 && !has_max) {
             return item_rule + "+";
-        } else if (min_items == 0 && !has_max) {
+        }
+        if (min_items == 0 && !has_max) {
             return item_rule + "*";
-        } else {
-            return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
         }
+        return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
     }
 
     auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
@@ -44,7 +44,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items
     return result;
 }
 
-static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
+static void build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
     auto has_min = min_value != std::numeric_limits<int64_t>::min();
     auto has_max = max_value != std::numeric_limits<int64_t>::max();
 
@@ -131,14 +131,14 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
     if (has_min && has_max) {
         if (min_value < 0 && max_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
             out << ")";
             return;
         }
 
         if (min_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
             out << ") | ";
             min_value = 0;
         }
@@ -162,7 +162,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
     if (has_min) {
         if (min_value < 0) {
             out << "\"-\" (";
-            _build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
+            build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
             out << ") | [0] | [1-9] ";
             more_digits(0, decimals_left - 1);
         } else if (min_value == 0) {
@@ -197,7 +197,7 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
             }
             digit_range(c, c);
             out << " (";
-            _build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
+            build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
             out << ")";
             if (c < '9') {
                 out << " | ";
@@ -216,10 +216,10 @@ static void _build_min_max_int(int64_t min_value, int64_t max_value, std::string
                 more_digits(0, less_decimals);
                 out << " | ";
             }
-            _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
+            build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
         } else {
             out << "\"-\" (";
-            _build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
+            build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
             out << ")";
         }
         return;
@@ -235,7 +235,7 @@ struct BuiltinRule {
     std::vector<std::string> deps;
 };
 
-std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
+static std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
     {"boolean", {"(\"true\" | \"false\") space", {}}},
     {"decimal-part", {"[0-9]{1,16}", {}}},
     {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
@@ -250,7 +250,7 @@ std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
     {"null", {"\"null\" space", {}}},
 };
 
-std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
+static std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
     {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
     {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
     {"date-time", {"date \"T\" time", {"date", "time"}}},
@@ -263,22 +263,26 @@ static bool is_reserved_name(const std::string & name) {
     static const std::unordered_set<std::string> RESERVED_NAMES = [] {
         std::unordered_set<std::string> s;
         s.insert("root");
-        for (const auto & p : PRIMITIVE_RULES) s.insert(p.first);
-        for (const auto & p : STRING_FORMAT_RULES) s.insert(p.first);
+        for (const auto & p : PRIMITIVE_RULES) {
+            s.insert(p.first);
+        }
+        for (const auto & p : STRING_FORMAT_RULES) {
+            s.insert(p.first);
+        }
         return s;
     }();
     return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
 }
 
-std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
-std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
-std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
-std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
+static std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+");
+static std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"\\\\]");
+static std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]");
+static std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
     {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"}, {'\\', "\\\\"}
 };
 
-std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
-std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
+static std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
 
 template <typename Iterator>
 std::string join(Iterator begin, Iterator end, const std::string & separator) {
@@ -368,19 +372,19 @@ class common_schema_converter {
         if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) {
             _rules[esc_name] = rule;
             return esc_name;
-        } else {
-            int i = 0;
-            while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
-                i++;
-            }
-            std::string key = esc_name + std::to_string(i);
-            _rules[key] = rule;
-            return key;
         }
+        int i = 0;
+        while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) {
+            i++;
+        }
+        std::string key = esc_name + std::to_string(i);
+        _rules[key] = rule;
+        return key;
     }
 
     std::string _generate_union_rule(const std::string & name, const std::vector<json> & alt_schemas) {
         std::vector<std::string> rules;
+        rules.reserve(alt_schemas.size());
         for (size_t i = 0; i < alt_schemas.size(); i++) {
             rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
         }
@@ -444,6 +448,7 @@ class common_schema_converter {
                 flush_literal();
 
                 std::vector<std::string> results;
+                results.reserve(ret.size());
                 for (const auto & item : ret) {
                     results.push_back(to_rule(item));
                 }
@@ -457,15 +462,30 @@ class common_schema_converter {
                     i++;
                 } else if (c == '(') {
                     i++;
-                    if (i < length) {
-                        if (sub_pattern[i] == '?') {
+                    if (i < length && sub_pattern[i] == '?') {
+                        if (i + 1 < length && sub_pattern[i + 1] == ':') {
+                            i += 2; // skip "?:" for non-capturing group, treat as regular group
+                        } else {
+                            // lookahead/lookbehind (?=, ?!, ?<=, ?<!) - not supported
                             _warnings.push_back("Unsupported pattern syntax");
+                            // skip to matching ')' to avoid UB on empty seq
+                            int depth = 1;
+                            while (i < length && depth > 0) {
+                                if (sub_pattern[i] == '\\' && i + 1 < length) {
+                                    i += 2; // skip escaped character
+                                } else {
+                                    if (sub_pattern[i] == '(') depth++;
+                                    else if (sub_pattern[i] == ')') depth--;
+                                    i++;
+                                }
+                            }
+                            continue;
                         }
                     }
                     seq.emplace_back("(" + to_rule(transform()) + ")", false);
                 } else if (c == ')') {
                     i++;
-                    if (start > 0 && sub_pattern[start - 1] != '(') {
+                    if (start > 0 && sub_pattern[start - 1] != '(' && (start < 2 || sub_pattern[start - 2] != '?' || sub_pattern[start - 1] != ':')) {
                         _errors.push_back("Unbalanced parentheses");
                     }
                     return join_seq();
@@ -597,7 +617,7 @@ class common_schema_converter {
             TrieNode() : is_end_of_string(false) {}
 
             void insert(const std::string & string) {
-                auto node = this;
+                auto *node = this;
                 for (char c : string) {
                     node = &node->children[c];
                 }
@@ -722,7 +742,7 @@ class common_schema_converter {
                 if (ks.empty()) {
                     return res;
                 }
-                std::string k = ks[0];
+                const std::string& k = ks[0];
                 std::string kv_rule_name = prop_kv_rule_names[k];
                 std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
                 if (first_is_optional) {
@@ -825,13 +845,13 @@ class common_schema_converter {
                         std::string pointer = ref.substr(ref.find('#') + 1);
                         std::vector<std::string> tokens = split(pointer, "/");
                         for (size_t i = 1; i < tokens.size(); ++i) {
-                            std::string sel = tokens[i];
+                            const std::string& sel = tokens[i];
                             if (target.is_object() && target.contains(sel)) {
                                 target = target[sel];
                             } else if (target.is_array()) {
                                 size_t sel_index;
                                 try {
-                                    sel_index = std::stoul(sel);
+                                    sel_index = std::stoull(sel);
                                 } catch (const std::invalid_argument & e) {
                                     sel_index = target.size();
                                 }
@@ -848,7 +868,7 @@ class common_schema_converter {
                         _refs[ref] = target;
                     }
                 } else {
-                    for (auto & kv : n.items()) {
+                    for (const auto & kv : n.items()) {
                         visit_refs(kv.value());
                     }
                 }
@@ -858,7 +878,7 @@ class common_schema_converter {
         visit_refs(schema);
     }
 
-    std::string _generate_constant_rule(const json & value) {
+    static std::string _generate_constant_rule(const json & value) {
         return format_literal(value.dump());
     }
 
@@ -869,10 +889,12 @@ class common_schema_converter {
 
         if (schema.contains("$ref")) {
             return _add_rule(rule_name, _resolve_ref(schema["$ref"]));
-        } else if (schema.contains("oneOf") || schema.contains("anyOf")) {
+        }
+        if (schema.contains("oneOf") || schema.contains("anyOf")) {
             std::vector<json> alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get<std::vector<json>>() : schema["anyOf"].get<std::vector<json>>();
             return _add_rule(rule_name, _generate_union_rule(name, alt_schemas));
-        } else if (schema_type.is_array()) {
+        }
+        if (schema_type.is_array()) {
             std::vector<json> schema_types;
             for (const auto & t : schema_type) {
                 json schema_copy(schema);
@@ -880,15 +902,18 @@ class common_schema_converter {
                 schema_types.push_back(schema_copy);
             }
             return _add_rule(rule_name, _generate_union_rule(name, schema_types));
-        } else if (schema.contains("const")) {
+        }
+        if (schema.contains("const")) {
             return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
-        } else if (schema.contains("enum")) {
+        }
+        if (schema.contains("enum")) {
             std::vector<std::string> enum_values;
             for (const auto & v : schema["enum"]) {
                 enum_values.push_back(_generate_constant_rule(v));
             }
             return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
-        } else if ((schema_type.is_null() || schema_type == "object")
+        }
+        if ((schema_type.is_null() || schema_type == "object")
                 && (schema.contains("properties") ||
                     (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
             std::unordered_set<std::string> required;
@@ -909,11 +934,12 @@ class common_schema_converter {
                 _build_object_rule(
                     properties, required, name,
                     schema.contains("additionalProperties") ? schema["additionalProperties"] : json()));
-        } else if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
+        }
+        if ((schema_type.is_null() || schema_type == "object" || schema_type == "string") && schema.contains("allOf")) {
             std::unordered_set<std::string> required;
             std::vector<std::pair<std::string, json>> properties;
             std::map<std::string, size_t> enum_values;
-            std::string hybrid_name = name;
+            const std::string& hybrid_name = name;
             std::function<void(const json &, bool)> add_component = [&](const json & comp_schema, bool is_required) {
                 if (comp_schema.contains("$ref")) {
                     add_component(_refs[comp_schema["$ref"]], is_required);
@@ -936,9 +962,9 @@ class common_schema_converter {
                   // todo warning
                 }
             };
-            for (auto & t : schema["allOf"]) {
+            for (const auto & t : schema["allOf"]) {
                 if (t.contains("anyOf")) {
-                    for (auto & tt : t["anyOf"]) {
+                    for (const auto & tt : t["anyOf"]) {
                         add_component(tt, false);
                     }
                 } else {
@@ -957,7 +983,8 @@ class common_schema_converter {
                 }
             }
             return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json()));
-        } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
+        }
+        if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) {
             json items = schema.contains("items") ? schema["items"] : schema["prefixItems"];
             if (items.is_array()) {
                 std::string rule = "\"[\" space ";
@@ -969,27 +996,31 @@ class common_schema_converter {
                 }
                 rule += " \"]\" space";
                 return _add_rule(rule_name, rule);
-            } else {
-                std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
-                int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
-                json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
-                int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
-
-                return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
             }
-        } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
+            std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
+            int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
+            json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
+            int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
+
+            return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
+        }
+        if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
             return _visit_pattern(schema["pattern"], rule_name);
-        } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
+        }
+        if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
             return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
-        } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
+        }
+        if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
             auto prim_name = schema_format + "-string";
             return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
-        } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
+        }
+        if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
             std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
             int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
             int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
             return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
-        } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
+        }
+        if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
             int64_t min_value = std::numeric_limits<int64_t>::min();
             int64_t max_value = std::numeric_limits<int64_t>::max();
             if (schema.contains("minimum")) {
@@ -1004,19 +1035,24 @@ class common_schema_converter {
             }
             std::stringstream out;
             out << "(";
-            _build_min_max_int(min_value, max_value, out);
+            build_min_max_int(min_value, max_value, out);
             out << ") space";
             return _add_rule(rule_name, out.str());
-        } else if (schema.empty() || schema_type == "object") {
+        }
+        if (schema.empty() || schema_type == "object") {
             return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
-        } else {
-            if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
-                _errors.push_back("Unrecognized schema: " + schema.dump());
-                return "";
-            }
-            // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
-            return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
         }
+        if (schema_type.is_null() && schema.is_object()) {
+            // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+            // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+            return _add_rule(rule_name, _add_primitive("value", PRIMITIVE_RULES.at("value")));
+        }
+        if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
+            _errors.push_back("Unrecognized schema: " + schema.dump());
+            return "";
+        }
+        // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
+        return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
     }
 
     void check_errors() {
@@ -1031,7 +1067,7 @@ class common_schema_converter {
     std::string format_grammar() {
         std::stringstream ss;
         for (const auto & kv : _rules) {
-            ss << kv.first << " ::= " << kv.second << std::endl;
+            ss << kv.first << " ::= " << kv.second << '\n';
         }
         return ss.str();
     }
diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp
index f2fc84500f..e37c1ce80e 100644
--- a/common/peg-parser.cpp
+++ b/common/peg-parser.cpp
@@ -1,14 +1,15 @@
-#include "common.h"
 #include "peg-parser.h"
+
+#include "common.h"
 #include "json-schema-to-grammar.h"
+#include "log.h"
 #include "unicode.h"
 
-#include <nlohmann/json.hpp>
-
 #include <algorithm>
 #include <initializer_list>
 #include <map>
 #include <memory>
+#include <nlohmann/json.hpp>
 #include <regex>
 #include <stdexcept>
 #include <unordered_set>
@@ -34,8 +35,7 @@ static bool is_hex_digit(const char c) {
 // This is used in common_peg_until_parser and to build a GBNF exclusion grammar
 struct trie {
     struct node {
-        size_t depth = 0;
-        std::map<unsigned char, size_t> children;
+        std::map<uint32_t, size_t> children;  // Use uint32_t to store Unicode codepoints
         bool is_word;
     };
 
@@ -55,15 +55,22 @@ struct trie {
         size_t current = 0; // Start at root
         size_t pos = start_pos;
 
+        // LOG_DBG("%s: checking at pos %zu, sv='%s'\n", __func__, start_pos, std::string(sv).c_str());
+
         while (pos < sv.size()) {
-            auto it = nodes[current].children.find(sv[pos]);
+            auto result = common_parse_utf8_codepoint(sv, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            auto it = nodes[current].children.find(result.codepoint);
             if (it == nodes[current].children.end()) {
                 // Can't continue matching
                 return match_result{match_result::NO_MATCH};
             }
 
             current = it->second;
-            pos++;
+            pos += result.bytes_consumed;
 
             // Check if we've matched a complete word
             if (nodes[current].is_word) {
@@ -82,22 +89,22 @@ struct trie {
     }
 
     struct prefix_and_next {
-        std::string prefix;
-        std::string next_chars;
+        std::vector<uint32_t> prefix;
+        std::vector<uint32_t> next_chars;
     };
 
     std::vector<prefix_and_next> collect_prefix_and_next() {
-        std::string prefix;
+        std::vector<uint32_t>        prefix;
         std::vector<prefix_and_next> result;
         collect_prefix_and_next(0, prefix, result);
         return result;
     }
 
   private:
-    void collect_prefix_and_next(size_t index, std::string & prefix, std::vector<prefix_and_next> & out) {
+    void collect_prefix_and_next(size_t index, std::vector<uint32_t> & prefix, std::vector<prefix_and_next> & out) {
         if (!nodes[index].is_word) {
             if (!nodes[index].children.empty()) {
-                std::string chars;
+                std::vector<uint32_t> chars;
                 chars.reserve(nodes[index].children.size());
                 for (const auto & p : nodes[index].children) {
                     chars.push_back(p.first);
@@ -107,7 +114,7 @@ struct trie {
         }
 
         for (const auto & p : nodes[index].children) {
-            unsigned char ch = p.first;
+            uint32_t ch = p.first;
             auto child = p.second;
             prefix.push_back(ch);
             collect_prefix_and_next(child, prefix, out);
@@ -123,11 +130,19 @@ struct trie {
 
     void insert(const std::string & word) {
         size_t current = 0;
-        for (unsigned char ch : word) {
+        size_t pos     = 0;
+        while (pos < word.length()) {
+            auto result = common_parse_utf8_codepoint(word, pos);
+            if (result.status != utf8_parse_result::SUCCESS) {
+                break;
+            }
+
+            uint32_t ch = result.codepoint;
+            pos += result.bytes_consumed;
+
             auto it = nodes[current].children.find(ch);
             if (it == nodes[current].children.end()) {
                 size_t child = create_node();
-                nodes[child].depth = nodes[current].depth + 1;
                 nodes[current].children[ch] = child;
                 current = child;
             } else {
@@ -241,6 +256,38 @@ static std::pair<std::vector<common_peg_chars_parser::char_range>, bool> parse_c
     return {ranges, negated};
 }
 
+common_peg_ast_id common_peg_ast_arena::find_by_tag(const common_peg_ast_node & parent, const std::string & tag, int max_depth) const {
+    for (auto child_id : parent.children) {
+        const auto & child = get(child_id);
+        if (child.tag == tag) {
+            return child_id;
+        }
+        if (max_depth > 1) {
+            auto result = find_by_tag(child, tag, max_depth - 1);
+            if (result != COMMON_PEG_INVALID_AST_ID) {
+                return result;
+            }
+        }
+    }
+    return COMMON_PEG_INVALID_AST_ID;
+}
+
+common_peg_ast_id common_peg_ast_arena::find_by_rule(const common_peg_ast_node & parent, const std::string & rule, int max_depth) const {
+    for (auto child_id : parent.children) {
+        const auto & child = get(child_id);
+        if (child.rule == rule) {
+            return child_id;
+        }
+        if (max_depth > 1) {
+            auto result = find_by_rule(child, rule, max_depth - 1);
+            if (result != COMMON_PEG_INVALID_AST_ID) {
+                return result;
+            }
+        }
+    }
+    return COMMON_PEG_INVALID_AST_ID;
+}
+
 void common_peg_ast_arena::visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const {
     if (id == COMMON_PEG_INVALID_AST_ID) {
         return;
@@ -286,6 +333,32 @@ struct parser_executor {
     parser_executor(const common_peg_arena & arena, common_peg_parse_context & ctx, size_t start)
         : arena(arena), ctx(ctx), start_pos(start) {}
 
+    std::string debug_indent() const { return std::string(ctx.parse_depth * 2, ' '); }
+
+    std::string debug_input_snippet(size_t pos, size_t len = 60) const {
+        if (pos >= ctx.input.size()) {
+            return "<EOF>";
+        }
+        auto        snippet = ctx.input.substr(pos, len);
+        // Escape newlines for display
+        std::string result;
+        for (char c : snippet) {
+            if (c == '\n') {
+                result += "\\n";
+            } else if (c == '\r') {
+                result += "\\r";
+            } else if (c == '\t') {
+                result += "\\t";
+            } else {
+                result += c;
+            }
+        }
+        if (pos + len < ctx.input.size()) {
+            result += "...";
+        }
+        return result;
+    }
+
     common_peg_parse_result operator()(const common_peg_epsilon_parser & /* p */) const {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos);
     }
@@ -308,7 +381,7 @@ struct parser_executor {
         auto pos = start_pos;
         for (auto i = 0u; i < p.literal.size(); ++i) {
             if (pos >= ctx.input.size()) {
-                if (!ctx.is_partial) {
+                if (!ctx.is_lenient()) {
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
                 }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
@@ -323,12 +396,32 @@ struct parser_executor {
     }
 
     common_peg_parse_result operator()(const common_peg_sequence_parser & p) {
+        if (ctx.is_debug()) {
+            LOG_DBG("%sSEQ start at %zu '%s' (%zu children)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
         auto pos = start_pos;
         std::vector<common_peg_ast_id> nodes;
 
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sSEQ child %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sSEQ child %zu: %s at %zu->%zu\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end);
+            }
+
             if (result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sSEQ -> FAIL\n", debug_indent().c_str());
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, result.end);
             }
 
@@ -337,28 +430,65 @@ struct parser_executor {
             }
 
             if (result.need_more_input()) {
+                ctx.parse_depth--;
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sSEQ -> NEED_MORE\n", debug_indent().c_str());
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
             }
 
             pos = result.end;
         }
 
+        ctx.parse_depth--;
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sSEQ -> SUCCESS at %zu->%zu\n", debug_indent().c_str(), start_pos, pos);
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
     common_peg_parse_result operator()(const common_peg_choice_parser & p) {
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sCHOICE start at %zu '%s' (%zu options)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.children.size());
+        }
+        ctx.parse_depth++;
+
         auto pos = start_pos;
-        for (const auto & child_id : p.children) {
+        for (size_t i = 0; i < p.children.size(); i++) {
+            const auto & child_id = p.children[i];
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i, arena.dump(child_id).c_str());
+            }
             auto result = arena.parse(child_id, ctx, pos);
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sCHOICE option %zu: %s\n", debug_indent().c_str(), i,
+                        common_peg_parse_result_type_name(result.type));
+            }
             if (!result.fail()) {
+                ctx.parse_depth--;
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sCHOICE -> %s (option %zu)\n", debug_indent().c_str(),
+                            common_peg_parse_result_type_name(result.type), i);
+                }
                 return result;
             }
         }
 
+        ctx.parse_depth--;
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sCHOICE -> FAIL (no options matched)\n", debug_indent().c_str());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
     }
 
     common_peg_parse_result operator()(const common_peg_repetition_parser & p) {
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sREPEAT start at %zu '%s' (min=%d, max=%d)\n", debug_indent().c_str(), start_pos,
+                    debug_input_snippet(start_pos).c_str(), p.min_count, p.max_count);
+        }
+        ctx.parse_depth++;
+
         auto pos = start_pos;
         int match_count = 0;
         std::vector<common_peg_ast_id> nodes;
@@ -366,14 +496,26 @@ struct parser_executor {
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
             if (pos >= ctx.input.size()) {
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sREPEAT: at end of input, count=%d\n", debug_indent().c_str(), match_count);
+                }
                 break;
             }
 
             auto result = arena.parse(p.child, ctx, pos);
 
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sREPEAT iter %d: %s at %zu->%zu, nodes=%zu\n", debug_indent().c_str(), match_count,
+                        common_peg_parse_result_type_name(result.type), result.start, result.end, result.nodes.size());
+                fprintf(stderr, "%sREPEAT CHILD: %s\n", debug_indent().c_str(), arena.dump(p.child).c_str());
+            }
+
             if (result.success()) {
                 // Prevent infinite loop on empty matches
                 if (result.end == pos) {
+                    if (ctx.is_debug()) {
+                        fprintf(stderr, "%s  REPEAT: empty match, stopping\n", debug_indent().c_str());
+                    }
                     break;
                 }
 
@@ -391,21 +533,43 @@ struct parser_executor {
                     nodes.insert(nodes.end(), result.nodes.begin(), result.nodes.end());
                 }
 
+                ctx.parse_depth--;
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (count=%d, nodes=%zu)\n", debug_indent().c_str(),
+                            match_count, nodes.size());
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, result.end, std::move(nodes));
             }
 
             // Child failed - stop trying
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sREPEAT: child failed, stopping\n", debug_indent().c_str());
+            }
             break;
         }
 
         // Check if we got enough matches
         if (p.min_count > 0 && match_count < p.min_count) {
-            if (pos >= ctx.input.size() && ctx.is_partial) {
+            ctx.parse_depth--;
+            if (pos >= ctx.input.size() && ctx.is_lenient()) {
+                if (ctx.is_debug()) {
+                    fprintf(stderr, "%sREPEAT -> NEED_MORE (not enough matches: %d < %d)\n", debug_indent().c_str(),
+                            match_count, p.min_count);
+                }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos, std::move(nodes));
             }
+            if (ctx.is_debug()) {
+                fprintf(stderr, "%sREPEAT -> FAIL (not enough matches: %d < %d)\n", debug_indent().c_str(), match_count,
+                        p.min_count);
+            }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
         }
 
+        ctx.parse_depth--;
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sREPEAT -> SUCCESS (count=%d, nodes=%zu)\n", debug_indent().c_str(), match_count,
+                    nodes.size());
+        }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos, std::move(nodes));
     }
 
@@ -434,10 +598,10 @@ struct parser_executor {
 
     common_peg_parse_result operator()(const common_peg_any_parser & /* p */) const {
         // Parse a single UTF-8 codepoint (not just a single byte)
-        auto result = parse_utf8_codepoint(ctx.input, start_pos);
+        auto result = common_parse_utf8_codepoint(ctx.input, start_pos);
 
         if (result.status == utf8_parse_result::INCOMPLETE) {
-            if (!ctx.is_partial) {
+            if (!ctx.is_lenient()) {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
             }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos);
@@ -468,7 +632,7 @@ struct parser_executor {
 
         // Try to match up to max_count times (or unlimited if max_count is -1)
         while (p.max_count == -1 || match_count < p.max_count) {
-            auto result = parse_utf8_codepoint(ctx.input, pos);
+            auto result = common_parse_utf8_codepoint(ctx.input, pos);
 
             if (result.status == utf8_parse_result::INCOMPLETE) {
                 if (match_count >= p.min_count) {
@@ -476,7 +640,7 @@ struct parser_executor {
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
                 }
                 // Not enough matches yet
-                if (!ctx.is_partial) {
+                if (!ctx.is_lenient()) {
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
                 }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
@@ -517,7 +681,7 @@ struct parser_executor {
 
         // Check if we got enough matches
         if (match_count < p.min_count) {
-            if (pos >= ctx.input.size() && ctx.is_partial) {
+            if (pos >= ctx.input.size() && ctx.is_lenient()) {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
             }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
@@ -526,31 +690,23 @@ struct parser_executor {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
     }
 
-    static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos) {
+    static common_peg_parse_result handle_escape_sequence(common_peg_parse_context & ctx, size_t start, size_t & pos, const char delimiter) {
         ++pos; // consume '\'
         if (pos >= ctx.input.size()) {
-            if (!ctx.is_partial) {
+            if (!ctx.is_lenient()) {
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
             }
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start, pos);
         }
 
-        switch (ctx.input[pos]) {
-            case '"':
-            case '\\':
-            case '/':
-            case 'b':
-            case 'f':
-            case 'n':
-            case 'r':
-            case 't':
-                ++pos;
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start, pos);
-            case 'u':
-                return handle_unicode_escape(ctx, start, pos);
-            default:
-                // Invalid escape sequence
-                return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
+        char c = ctx.input[pos];
+        if (c == delimiter || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || c == 'r' || c == 't') {
+            ++pos;
+            return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start, pos);
+        } else if (c == 'u') {
+            return handle_unicode_escape(ctx, start, pos);
+        } else {
+            return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
         }
     }
 
@@ -558,7 +714,7 @@ struct parser_executor {
         ++pos; // consume 'u'
         for (int i = 0; i < 4; ++i) {
             if (pos >= ctx.input.size()) {
-                if (!ctx.is_partial) {
+                if (!ctx.is_lenient()) {
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start);
                 }
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start, pos);
@@ -571,28 +727,28 @@ struct parser_executor {
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start, pos);
     }
 
-    common_peg_parse_result operator()(const common_peg_json_string_parser & /* p */) {
+    common_peg_parse_result operator()(const common_peg_string_parser & p) {
         auto pos = start_pos;
 
         // Parse string content (without quotes)
         while (pos < ctx.input.size()) {
             char c = ctx.input[pos];
 
-            if (c == '"') {
-                // Found closing quote - success (don't consume it)
+            if (c == p.delimiter) {
+                // Found closing delimiter - success (don't consume it)
                 return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_SUCCESS, start_pos, pos);
             }
 
             if (c == '\\') {
-                auto result = handle_escape_sequence(ctx, start_pos, pos);
+                auto result = handle_escape_sequence(ctx, start_pos, pos, p.delimiter);
                 if (!result.success()) {
                     return result;
                 }
             } else {
-                auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+                auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
 
                 if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
-                    if (!ctx.is_partial) {
+                    if (!ctx.is_lenient()) {
                         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
                     }
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
@@ -607,7 +763,7 @@ struct parser_executor {
         }
 
         // Reached end without finding closing quote
-        if (!ctx.is_partial) {
+        if (!ctx.is_lenient()) {
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos, pos);
         }
         return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, pos);
@@ -621,11 +777,11 @@ struct parser_executor {
         size_t last_valid_pos = start_pos;
 
         while (pos < ctx.input.size()) {
-            auto utf8_result = parse_utf8_codepoint(ctx.input, pos);
+            auto utf8_result = common_parse_utf8_codepoint(ctx.input, pos);
 
             if (utf8_result.status == utf8_parse_result::INCOMPLETE) {
                 // Incomplete UTF-8 sequence
-                if (!ctx.is_partial) {
+                if (!ctx.is_lenient()) {
                     // Input is complete but UTF-8 is incomplete = malformed
                     return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_FAIL, start_pos);
                 }
@@ -655,7 +811,7 @@ struct parser_executor {
             last_valid_pos = pos;
         }
 
-        if (last_valid_pos == ctx.input.size() && ctx.is_partial) {
+        if (last_valid_pos == ctx.input.size() && ctx.is_lenient()) {
             // Reached the end of a partial stream, there might still be more input that we need to consume.
             return common_peg_parse_result(COMMON_PEG_PARSE_RESULT_NEED_MORE_INPUT, start_pos, last_valid_pos);
         }
@@ -694,6 +850,9 @@ struct parser_executor {
 
     common_peg_parse_result operator()(const common_peg_tag_parser & p) {
         // Parse the child
+        if (ctx.is_debug()) {
+            fprintf(stderr, "%sTAG: %s\n", debug_indent().c_str(), p.tag.c_str());
+        }
         auto result = arena.parse(p.child, ctx, start_pos);
 
         if (!result.fail()) {
@@ -731,6 +890,10 @@ struct parser_executor {
         }
         return result;
     }
+
+    common_peg_parse_result operator()(const common_peg_gbnf_parser & p) {
+        return arena.parse(p.child, ctx, start_pos);
+    }
 };
 
 common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx, size_t start) const {
@@ -755,6 +918,31 @@ common_peg_parser_id common_peg_arena::resolve_ref(common_peg_parser_id id) {
     return id;
 }
 
+static void bfs_node(common_peg_ast_arena &arena, std::ostringstream & oss, const common_peg_ast_node & node, int indent) {
+    for (int i = 0; i < indent; i++) {
+        oss << "  ";
+    }
+    oss << "NODE " << node.id;
+    if (!node.rule.empty()) {
+        oss << " (rule " << node.rule << ")";
+    }
+    if (!node.tag.empty()) {
+        oss << " (tag " << node.tag << ")";
+    }
+    oss << " ['" << node.text << "']\n";
+    for (const auto child : node.children) {
+        bfs_node(arena, oss, arena.get(child), indent + 1);
+    }
+}
+
+std::string common_peg_ast_arena::dump() {
+    std::ostringstream oss;
+    for (auto & node : nodes_) {
+        bfs_node(*this, oss, node, 0);
+    }
+    return oss.str();
+}
+
 void common_peg_arena::resolve_refs() {
     // Walk through all parsers and replace refs with their corresponding rule IDs
     for (auto & parser : parsers_) {
@@ -773,7 +961,8 @@ void common_peg_arena::resolve_refs() {
                                  std::is_same_v<T, common_peg_and_parser> ||
                                  std::is_same_v<T, common_peg_not_parser> ||
                                  std::is_same_v<T, common_peg_tag_parser> ||
-                                 std::is_same_v<T, common_peg_atomic_parser>) {
+                                 std::is_same_v<T, common_peg_atomic_parser> ||
+                                 std::is_same_v<T, common_peg_gbnf_parser>) {
                 p.child = resolve_ref(p.child);
             } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
                 p.child = resolve_ref(p.child);
@@ -785,7 +974,7 @@ void common_peg_arena::resolve_refs() {
                                  std::is_same_v<T, common_peg_ref_parser> ||
                                  std::is_same_v<T, common_peg_until_parser> ||
                                  std::is_same_v<T, common_peg_literal_parser> ||
-                                 std::is_same_v<T, common_peg_json_string_parser> ||
+                                 std::is_same_v<T, common_peg_string_parser> ||
                                  std::is_same_v<T, common_peg_chars_parser> ||
                                  std::is_same_v<T, common_peg_any_parser> ||
                                  std::is_same_v<T, common_peg_space_parser>) {
@@ -803,9 +992,21 @@ void common_peg_arena::resolve_refs() {
 }
 
 std::string common_peg_arena::dump(common_peg_parser_id id) const {
+    std::unordered_set<common_peg_parser_id> visited;
+    return dump_impl(id, visited);
+}
+
+std::string common_peg_arena::dump_impl(common_peg_parser_id                       id,
+                                        std::unordered_set<common_peg_parser_id> & visited) const {
+    // Check for cycles
+    if (visited.count(id)) {
+        return "[cycle]";
+    }
+    visited.insert(id);
+
     const auto & parser = parsers_.at(id);
 
-    return std::visit([this](const auto & p) -> std::string {
+    return std::visit([this, &visited](const auto & p) -> std::string {
         using T = std::decay_t<decltype(p)>;
 
         if constexpr (std::is_same_v<T, common_peg_epsilon_parser>) {
@@ -819,24 +1020,29 @@ std::string common_peg_arena::dump(common_peg_parser_id id) const {
         } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
             std::vector<std::string> parts;
             for (const auto & child : p.children) {
-                parts.push_back(dump(child));
+                parts.push_back(dump_impl(child, visited));
             }
             return "Sequence(" + string_join(parts, ", ") + ")";
         } else if constexpr (std::is_same_v<T, common_peg_choice_parser>) {
             std::vector<std::string> parts;
             for (const auto & child : p.children) {
-                parts.push_back(dump(child));
+                parts.push_back(dump_impl(child, visited));
             }
             return "Choice(" + string_join(parts, ", ") + ")";
         } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
             if (p.max_count == -1) {
-                return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", unbounded)";
+                return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) +
+                        ", unbounded)";
             }
-            return "Repetition(" + dump(p.child) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
+            return "Repetition(" + dump_impl(p.child, visited) + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
         } else if constexpr (std::is_same_v<T, common_peg_and_parser>) {
-            return "And(" + dump(p.child) + ")";
+            return "And(" + dump_impl(p.child, visited) + ")";
         } else if constexpr (std::is_same_v<T, common_peg_not_parser>) {
-            return "Not(" + dump(p.child) + ")";
+            return "Not(" + dump_impl(p.child, visited) + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+            return "Atomic(" + dump_impl(p.child, visited) + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
+            return "Gbnf(" + p.grammar + ", " + dump_impl(p.child, visited) + ")";
         } else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
             return "Any";
         } else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
@@ -846,16 +1052,20 @@ std::string common_peg_arena::dump(common_peg_parser_id id) const {
                 return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", unbounded)";
             }
             return "CharRepeat(" + p.pattern + ", " + std::to_string(p.min_count) + ", " + std::to_string(p.max_count) + ")";
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return "JsonString()";
+        } else if constexpr (std::is_same_v<T, common_peg_string_parser>) {
+            return "String(" + std::string(1, p.delimiter) + ")";
         } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
             return "Until(" + string_join(p.delimiters, " | ") + ")";
         } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-            return "Schema(" + dump(p.child) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
+            return "Schema(" + dump_impl(p.child, visited) + ", " + (p.schema ? p.schema->dump() : "null") + ")";
         } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
-            return "Rule(" + p.name + ", " + dump(p.child) + ")";
+            return "Rule(" + p.name + ", " + dump_impl(p.child, visited) + ")";
         } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
             return "Ref(" + p.name + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_tag_parser>) {
+            return "Tag(" + p.tag + ", " + dump(p.child) + ")";
+        } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
+            return "Atomic(" + dump(p.child) + ")";
         } else {
             return "Unknown";
         }
@@ -1054,7 +1264,32 @@ common_peg_arena common_peg_parser_builder::build() {
     return std::move(arena_);
 }
 
+// String primitives
+
+common_peg_parser common_peg_parser_builder::string_content(char delimiter) {
+    return wrap(arena_.add_parser(common_peg_string_parser{delimiter}));
+}
+
+common_peg_parser common_peg_parser_builder::double_quoted_string() {
+    return rule("double-quoted-string", [this]() {
+        return sequence({literal("\""), string_content('"'), literal("\""), space()});
+    });
+}
+
+common_peg_parser common_peg_parser_builder::single_quoted_string() {
+    return rule("single-quoted-string", [this]() {
+        return sequence({literal("'"), string_content('\''), literal("'"), space()});
+    });
+}
+
+common_peg_parser common_peg_parser_builder::quoted_string() {
+    return rule("quoted-string", [this]() {
+        return choice({double_quoted_string(), single_quoted_string()});
+    });
+}
+
 // JSON parsers
+
 common_peg_parser common_peg_parser_builder::json_number() {
    return rule("json-number", [this]() {
         auto digit1_9 = chars("[1-9]", 1, 1);
@@ -1062,13 +1297,17 @@ common_peg_parser common_peg_parser_builder::json_number() {
         auto int_part = choice({literal("0"), sequence({digit1_9, chars("[0-9]", 0, -1)})});
         auto frac = sequence({literal("."), digits});
         auto exp = sequence({choice({literal("e"), literal("E")}), optional(chars("[+-]", 1, 1)), digits});
-        return sequence({optional(literal("-")), int_part, optional(frac), optional(exp), space()});
+        // Negative lookahead: only commit the number when the next character can't extend it.
+        // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
+        // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
+        auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
+        return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
     });
 }
 
 common_peg_parser common_peg_parser_builder::json_string() {
     return rule("json-string", [this]() {
-        return sequence({literal("\""), json_string_content(), literal("\""), space()});
+        return sequence({literal("\""), string_content('"'), literal("\""), space()});
     });
 }
 
@@ -1130,8 +1369,81 @@ common_peg_parser common_peg_parser_builder::json() {
     });
 }
 
-common_peg_parser common_peg_parser_builder::json_string_content() {
-    return wrap(arena_.add_parser(common_peg_json_string_parser{}));
+common_peg_parser common_peg_parser_builder::python_string() {
+    return rule("python-string", [this]() {
+        return choice({double_quoted_string(), single_quoted_string()});
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_number() {
+    return json_number();
+}
+
+common_peg_parser common_peg_parser_builder::python_bool() {
+    return rule("python-bool", [this]() {
+        return sequence({
+            choice({literal("True"), literal("False")}),
+            space()
+        });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_null() {
+    return rule("python-none", [this]() {
+        return sequence({literal("None"), space()});
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_dict() {
+    return rule("python-dict", [this]() {
+        auto ws = space();
+        auto member = sequence({python_string(), ws, literal(":"), ws, python_value()});
+        auto members = sequence({member, zero_or_more(sequence({ws, literal(","), ws, member}))});
+        return sequence({
+            literal("{"),
+            ws,
+            choice({
+                literal("}"),
+                sequence({members, ws, literal("}")})
+            }),
+            ws
+        });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_array() {
+    return rule("python-array", [this]() {
+        auto ws = space();
+        auto elements = sequence({python_value(), zero_or_more(sequence({literal(","), ws, python_value()}))});
+        return sequence({
+            literal("["),
+            ws,
+            choice({
+                literal("]"),
+                sequence({elements, ws, literal("]")})
+            }),
+            ws
+        });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::python_value() {
+    return rule("python-value", [this]() {
+        return choice({
+            python_dict(),
+            python_array(),
+            python_string(),
+            python_number(),
+            python_bool(),
+            python_null()
+        });
+    });
+}
+
+common_peg_parser common_peg_parser_builder::marker() {
+    auto sharp_bracket_parser = literal("<") + until(">") + literal(">");
+    auto square_bracket_parser = literal("[") + until("]") + literal("]");
+    return choice({ sharp_bracket_parser, square_bracket_parser });
 }
 
 common_peg_parser common_peg_parser_builder::json_member(const std::string & key, const common_peg_parser & p) {
@@ -1145,17 +1457,54 @@ common_peg_parser common_peg_parser_builder::json_member(const std::string & key
     });
 }
 
+static std::string gbnf_escape_char_class(uint32_t c) {
+    if (c == '-' || c == ']' || c == '[' || c == '\\') {
+        return "\\" + std::string(1, (char) c);
+    }
+    // Escape whitespace control characters
+    if (c == '\n') {
+        return "\\n";
+    }
+    if (c == '\t') {
+        return "\\t";
+    }
+    if (c == '\r') {
+        return "\\r";
+    }
+
+    // Printable ASCII
+    if (c >= 0x20 && c <= 0x7E) {
+        return std::string(1, (char) c);
+    }
 
-static std::string gbnf_escape_char_class(char c) {
-    switch (c) {
-        case '\n': return "\\n";
-        case '\t': return "\\t";
-        case '\r': return "\\r";
-        case '\\': return "\\\\";
-        case ']':  return "\\]";
-        case '[':  return "\\[";
-        default:   return std::string(1, c);
+    // Hex escape
+    char         buf[16];
+    const char * hex = "0123456789ABCDEF";
+
+    if (c <= 0xFF) {
+        buf[0] = '\\';
+        buf[1] = 'x';
+        buf[2] = hex[(c >> 4) & 0xF];
+        buf[3] = hex[c & 0xF];
+        buf[4] = '\0';
+    } else if (c <= 0xFFFF) {
+        buf[0] = '\\';
+        buf[1] = 'u';
+        buf[2] = hex[(c >> 12) & 0xF];
+        buf[3] = hex[(c >> 8) & 0xF];
+        buf[4] = hex[(c >> 4) & 0xF];
+        buf[5] = hex[c & 0xF];
+        buf[6] = '\0';
+    } else {
+        buf[0] = '\\';
+        buf[1] = 'U';
+        for (int i = 0; i < 8; i++) {
+            buf[2 + i] = hex[(c >> ((7 - i) * 4)) & 0xF];
+        }
+        buf[10] = '\0';
     }
+
+    return std::string(buf);
 }
 
 static std::string gbnf_excluding_pattern(const std::vector<std::string> & strings) {
@@ -1173,12 +1522,12 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
 
         std::string cls;
         cls.reserve(chars.size());
-        for (const auto & ch : chars) {
+        for (uint32_t ch : chars) {
             cls += gbnf_escape_char_class(ch);
         }
 
         if (!pre.empty()) {
-            pattern += gbnf_format_literal(pre) + " [^" + cls + "]";
+            pattern += gbnf_format_literal(common_unicode_cpts_to_utf8(pre)) + " [^" + cls + "]";
         } else {
             pattern += "[^" + cls + "]";
         }
@@ -1208,7 +1557,7 @@ static std::unordered_set<std::string> collect_reachable_rules(
                           std::is_same_v<T, common_peg_chars_parser> ||
                           std::is_same_v<T, common_peg_space_parser> ||
                           std::is_same_v<T, common_peg_any_parser> ||
-                          std::is_same_v<T, common_peg_json_string_parser>) {
+                          std::is_same_v<T, common_peg_string_parser>) {
                 // These parsers do not have any children
             } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
                 for (auto child : p.children) {
@@ -1223,6 +1572,7 @@ static std::unordered_set<std::string> collect_reachable_rules(
                                  std::is_same_v<T, common_peg_not_parser> ||
                                  std::is_same_v<T, common_peg_tag_parser> ||
                                  std::is_same_v<T, common_peg_atomic_parser> ||
+                                 std::is_same_v<T, common_peg_gbnf_parser> ||
                                  std::is_same_v<T, common_peg_schema_parser>) {
                 visit(p.child);
             } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
@@ -1247,6 +1597,52 @@ static std::unordered_set<std::string> collect_reachable_rules(
 
 // GBNF generation implementation
 void common_peg_arena::build_grammar(const common_grammar_builder & builder, bool lazy) const {
+    auto schema_delegates = [](const common_peg_schema_parser & s) -> bool {
+        if (!s.schema) {
+            return true;
+        }
+        if (s.raw && s.schema->contains("type")) {
+            const auto & type_val = s.schema->at("type");
+            if (type_val.is_string() && type_val == "string") {
+                return true;
+            }
+            // Handle nullable types like ["string", "null"] - delegate when the
+            // non-null type is string, since the tagged format uses raw text
+            if (type_val.is_array()) {
+                for (const auto & t : type_val) {
+                    if (t.is_string() && t.get<std::string>() != "null") {
+                        return t.get<std::string>() == "string";
+                    }
+                }
+            }
+        }
+        // Delegate for enum schemas in raw mode - enum values are literal strings
+        if (s.raw && !s.schema->contains("type") && s.schema->contains("enum")) {
+            return true;
+        }
+        return false;
+    };
+
+    // Unwrap the parser so we can properly check if it's a sequence or choice
+    auto effective_parser = [&](common_peg_parser_id id) -> const common_peg_parser_variant & {
+        while (true) {
+            const auto & p = parsers_.at(id);
+            if (const auto * tag = std::get_if<common_peg_tag_parser>(&p)) {
+                id = tag->child;
+            } else if (const auto * atomic = std::get_if<common_peg_atomic_parser>(&p)) {
+                id = atomic->child;
+            } else if (const auto * schema = std::get_if<common_peg_schema_parser>(&p)) {
+                if (schema_delegates(*schema)) {
+                    id = schema->child;
+                } else {
+                    return p;
+                }
+            } else {
+                return p;
+            }
+        }
+    };
+
     // Generate GBNF for a parser
     std::function<std::string(common_peg_parser_id)> to_gbnf = [&](common_peg_parser_id id) -> std::string {
         const auto & parser = parsers_.at(id);
@@ -1263,11 +1659,14 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
             } else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
                 std::string s;
                 for (const auto & child : p.children) {
+                    auto child_gbnf = to_gbnf(child);
+                    if (child_gbnf.empty()) {
+                        continue;
+                    }
                     if (!s.empty()) {
                         s += " ";
                     }
-                    auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
+                    const auto & child_parser = effective_parser(child);
                     if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
                         std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
                         s += "(" + child_gbnf + ")";
@@ -1283,7 +1682,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
                         s += " | ";
                     }
                     auto child_gbnf = to_gbnf(child);
-                    const auto & child_parser = parsers_.at(child);
+                    const auto & child_parser = effective_parser(child);
                     if (std::holds_alternative<common_peg_choice_parser>(child_parser)) {
                         s += "(" + child_gbnf + ")";
                     } else {
@@ -1293,7 +1692,7 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
                 return s;
             } else if constexpr (std::is_same_v<T, common_peg_repetition_parser>) {
                 auto child_gbnf = to_gbnf(p.child);
-                const auto & child_parser = parsers_.at(p.child);
+                const auto & child_parser = effective_parser(p.child);
                 if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
                     std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
                     child_gbnf = "(" + child_gbnf + ")";
@@ -1344,23 +1743,19 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
                     return result + "{" + std::to_string(p.min_count) + "}";
                 }
                 return result + "{" + std::to_string(p.min_count) + "," + std::to_string(p.max_count) + "}";
-            } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-                return R"(( [^"\\] | "\\" ( ["\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
+            } else if constexpr (std::is_same_v<T, common_peg_string_parser>) {
+                const std::string delim(1, p.delimiter);
+                return R"(( [^)" + delim + R"(\\] | "\\" ( [)" + delim + R"(\\/ bfnrt] | "u" [0-9a-fA-F]{4} ) )*)";
             } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
                 if (p.delimiters.empty()) {
                     return ".*";
                 }
                 return gbnf_excluding_pattern(p.delimiters);
             } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
-                if (p.schema) {
-                    if (p.raw && p.schema->contains("type") && p.schema->at("type").is_string() && p.schema->at("type") == "string") {
-                        // TODO: Implement more comprehensive grammar generation for raw strings.
-                        // For now, use the grammar emitted from the underlying parser.
-                        return to_gbnf(p.child);
-                    }
-                    return builder.add_schema(p.name, *p.schema);
+                if (schema_delegates(p)) {
+                    return to_gbnf(p.child);
                 }
-                return to_gbnf(p.child);
+                return builder.add_schema(p.name, *p.schema);
             } else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
                 return p.name;
             } else if constexpr (std::is_same_v<T, common_peg_ref_parser>) {
@@ -1370,6 +1765,8 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
                 return to_gbnf(p.child);
             } else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
                 return to_gbnf(p.child);
+            } else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
+                return p.grammar;
             } else {
                 static_assert(is_always_false_v<T>);
             }
@@ -1475,8 +1872,8 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant &
                 {"min_count", p.min_count},
                 {"max_count", p.max_count}
             };
-        } else if constexpr (std::is_same_v<T, common_peg_json_string_parser>) {
-            return json{{"type", "json_string"}};
+        } else if constexpr (std::is_same_v<T, common_peg_string_parser>) {
+            return json{{"type", "string"}, {"delimiter", std::string(1, p.delimiter)}};
         } else if constexpr (std::is_same_v<T, common_peg_until_parser>) {
             return json{{"type", "until"}, {"delimiters", p.delimiters}};
         } else if constexpr (std::is_same_v<T, common_peg_schema_parser>) {
@@ -1504,6 +1901,8 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant &
                 {"child", p.child},
                 {"tag", p.tag}
             };
+        } else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
+            return json{{"type", "gbnf"}, {"child", p.child}, {"grammar", p.grammar}};
         }
     }, variant);
 }
@@ -1603,8 +2002,15 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         }
         return parser;
     }
-    if (type == "json_string") {
-        return common_peg_json_string_parser{};
+    if (type == "string") {
+        if (!j.contains("delimiter")) {
+            throw std::runtime_error("string parser missing delimiter field.");
+        }
+        std::string delimiter = j["delimiter"];
+        if (delimiter.empty()) {
+            throw std::runtime_error("string parser delimiter is empty.");
+        }
+        return common_peg_string_parser{delimiter[0]};
     }
     if (type == "until") {
         if (!j.contains("delimiters") || !j["delimiters"].is_array()) {
@@ -1659,6 +2065,16 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
         };
     }
 
+    if (type == "gbnf") {
+        if (!j.contains("child") || !j.contains("grammar")) {
+            throw std::runtime_error("gbnf parser missing required fields");
+        }
+        return common_peg_gbnf_parser{
+            j["child"].get<common_peg_parser_id>(),
+            j["grammar"].get<std::string>(),
+        };
+    }
+
     throw std::runtime_error("Unknown parser type: " + type);
 }
 
diff --git a/common/peg-parser.h b/common/peg-parser.h
index 1cd640365f..b6bb05214b 100644
--- a/common/peg-parser.h
+++ b/common/peg-parser.h
@@ -4,6 +4,7 @@
 
 #include <memory>
 #include <unordered_map>
+#include <unordered_set>
 #include <string>
 #include <string_view>
 #include <functional>
@@ -105,12 +106,17 @@ class common_peg_ast_arena {
 
     const common_peg_ast_node & get(common_peg_ast_id id) const { return nodes_.at(id); }
 
+    common_peg_ast_id find_by_tag(const common_peg_ast_node & parent, const std::string & tag, int max_depth = 3) const;
+    common_peg_ast_id find_by_rule(const common_peg_ast_node & parent, const std::string & tag, int max_depth = 3) const;
+
     size_t size() const { return nodes_.size(); }
 
     void clear() { nodes_.clear(); }
 
     void visit(common_peg_ast_id id, const common_peg_ast_visitor & visitor) const;
     void visit(const common_peg_parse_result & result, const common_peg_ast_visitor & visitor) const;
+
+    std::string dump();
 };
 
 struct common_peg_parse_result {
@@ -136,21 +142,43 @@ struct common_peg_parse_result {
     bool success() const { return type == COMMON_PEG_PARSE_RESULT_SUCCESS; }
 };
 
+enum common_peg_parse_flags {
+    COMMON_PEG_PARSE_FLAG_NONE    = 0,
+    COMMON_PEG_PARSE_FLAG_LENIENT = 1 << 0,
+    COMMON_PEG_PARSE_FLAG_DEBUG   = 1 << 1,
+};
+
+inline common_peg_parse_flags operator|(common_peg_parse_flags a, common_peg_parse_flags b) {
+    return static_cast<common_peg_parse_flags>(int(a) | int(b));
+}
+
+inline common_peg_parse_flags & operator|=(common_peg_parse_flags & a, common_peg_parse_flags b) {
+    return a = a | b;
+}
+
+inline common_peg_parse_flags operator&(common_peg_parse_flags a, common_peg_parse_flags b) {
+    return static_cast<common_peg_parse_flags>(int(a) & int(b));
+}
+
+inline common_peg_parse_flags operator~(common_peg_parse_flags a) {
+    return static_cast<common_peg_parse_flags>(~int(a));
+}
+
 struct common_peg_parse_context {
     std::string input;
-    bool is_partial;
+    common_peg_parse_flags flags;
     common_peg_ast_arena ast;
 
     int parse_depth;
 
-    common_peg_parse_context()
-        : is_partial(false), parse_depth(0) {}
+    common_peg_parse_context(common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_NONE)
+        : flags(flags), parse_depth(0) {}
 
-    common_peg_parse_context(const std::string & input)
-        : input(input), is_partial(false), parse_depth(0) {}
+    common_peg_parse_context(const std::string & input, common_peg_parse_flags flags = COMMON_PEG_PARSE_FLAG_NONE)
+        : input(input), flags(flags), parse_depth(0) {}
 
-    common_peg_parse_context(const std::string & input, bool is_partial)
-        : input(input), is_partial(is_partial), parse_depth(0) {}
+    bool is_lenient() const { return flags & COMMON_PEG_PARSE_FLAG_LENIENT; }
+    bool is_debug() const { return flags & COMMON_PEG_PARSE_FLAG_DEBUG; }
 };
 
 class common_peg_arena;
@@ -206,7 +234,9 @@ struct common_peg_chars_parser {
     int max_count;  // -1 for unbounded
 };
 
-struct common_peg_json_string_parser {};
+struct common_peg_string_parser {
+    char delimiter;
+};
 
 struct common_peg_until_parser {
     std::vector<std::string> delimiters;
@@ -240,6 +270,11 @@ struct common_peg_tag_parser {
     std::string tag;
 };
 
+struct common_peg_gbnf_parser {
+    common_peg_parser_id child;
+    std::string grammar;
+};
+
 // Variant holding all parser types
 using common_peg_parser_variant = std::variant<
     common_peg_epsilon_parser,
@@ -254,13 +289,14 @@ using common_peg_parser_variant = std::variant<
     common_peg_any_parser,
     common_peg_space_parser,
     common_peg_chars_parser,
-    common_peg_json_string_parser,
+    common_peg_string_parser,
     common_peg_until_parser,
     common_peg_schema_parser,
     common_peg_rule_parser,
     common_peg_ref_parser,
     common_peg_atomic_parser,
-    common_peg_tag_parser
+    common_peg_tag_parser,
+    common_peg_gbnf_parser
 >;
 
 class common_peg_arena {
@@ -299,6 +335,8 @@ class common_peg_arena {
     friend class common_peg_parser_builder;
 
   private:
+    std::string dump_impl(common_peg_parser_id id, std::unordered_set<common_peg_parser_id> & visited) const;
+
     common_peg_parser_id add_parser(common_peg_parser_variant parser);
     void add_rule(const std::string & name, common_peg_parser_id id);
 
@@ -404,6 +442,18 @@ class common_peg_parser_builder {
     //   S -> A{n}
     common_peg_parser repeat(const common_peg_parser & p, int n) { return repeat(p, n, n); }
 
+    // Matches a double-quoted string: '"' content '"' space
+    common_peg_parser double_quoted_string();
+
+    // Matches a single-quoted string: "'" content "'" space
+    common_peg_parser single_quoted_string();
+
+    // Matches a string that accepts both double-quoted and single-quoted styles.
+    common_peg_parser quoted_string();
+
+    // Matches string content without the surrounding delimiter.
+    common_peg_parser string_content(char delimiter);
+
     // Creates a complete JSON parser supporting objects, arrays, strings, numbers, booleans, and null.
     //   value -> object | array | string | number | true | false | null
     common_peg_parser json();
@@ -414,14 +464,24 @@ class common_peg_parser_builder {
     common_peg_parser json_bool();
     common_peg_parser json_null();
 
-    // Matches JSON string content without the surrounding quotes.
-    // Useful for extracting content within a JSON string.
-    common_peg_parser json_string_content();
-
     // Matches a JSON object member with a key and associated parser as the
     // value.
     common_peg_parser json_member(const std::string & key, const common_peg_parser & p);
 
+    // Creates a complete Python format parser supporting dicts, arrays, strings, numbers, booleans, and None.
+    // Differs from JSON: uses True/False/None, accepts both single and double-quoted strings.
+    //   value -> dict | array | string | number | True | False | None
+    common_peg_parser python_value();
+    common_peg_parser python_dict();
+    common_peg_parser python_string();
+    common_peg_parser python_array();
+    common_peg_parser python_number();
+    common_peg_parser python_bool();
+    common_peg_parser python_null();
+
+    // A marker, i.e. text delimited by a pair of <> or []
+    common_peg_parser marker();
+
     // Wraps a parser with JSON schema metadata for grammar generation.
     // Used internally to convert JSON schemas to GBNF grammar rules.
     common_peg_parser schema(const common_peg_parser & p, const std::string & name, const nlohmann::ordered_json & schema, bool raw = false);
@@ -450,6 +510,10 @@ class common_peg_parser_builder {
     // Unlike rules, you can tag multiple nodes with the same tag.
     common_peg_parser tag(const std::string & tag, const common_peg_parser & p) { return add(common_peg_tag_parser{p.id(), tag}); }
 
+    // Wraps a child parser but emits a custom GBNF grammar string instead of
+    // the child's grammar. Parsing delegates entirely to the child.
+    common_peg_parser gbnf(const common_peg_parser & p, const std::string & grammar) { return add(common_peg_gbnf_parser{p, grammar}); }
+
     void set_root(const common_peg_parser & p);
 
     common_peg_arena build();
diff --git a/common/reasoning-budget.cpp b/common/reasoning-budget.cpp
new file mode 100644
index 0000000000..8f814d9ef5
--- /dev/null
+++ b/common/reasoning-budget.cpp
@@ -0,0 +1,265 @@
+#include "reasoning-budget.h"
+#include "common.h"
+#include "unicode.h"
+
+#include "log.h"
+
+#include <cmath>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+struct token_matcher {
+    std::vector<llama_token> tokens;
+    size_t pos = 0;
+
+    bool advance(llama_token token) {
+        if (tokens.empty()) {
+            return false;
+        }
+
+        if (token == tokens[pos]) {
+            pos++;
+            if (pos >= tokens.size()) {
+                pos = 0;
+                return true;
+            }
+        } else {
+            pos = 0;
+            if (token == tokens[0]) {
+                pos = 1;
+            }
+        }
+        return false;
+    }
+
+    void reset() { pos = 0; }
+};
+
+struct common_reasoning_budget_ctx {
+    const llama_vocab * vocab;
+
+    token_matcher start_matcher;
+    token_matcher end_matcher;
+    std::vector<llama_token> forced_tokens;
+
+    int32_t budget;           // maximum tokens in reasoning block
+    int32_t remaining;        // tokens remaining in budget
+
+    common_reasoning_budget_state state;
+
+    // for forcing
+    size_t force_pos;         // next position in forced_tokens to force
+};
+
+static const char * common_reasoning_budget_name(const common_reasoning_budget_ctx * /*smpl*/) {
+    return "reasoning-budget";
+}
+
+static void common_reasoning_budget_accept(common_reasoning_budget_ctx * smpl, llama_token token) {
+    auto * ctx = (common_reasoning_budget_ctx *)smpl;
+
+    switch (ctx->state) {
+    case REASONING_BUDGET_IDLE:
+    {
+        if (ctx->start_matcher.advance(token)) {
+            ctx->state = REASONING_BUDGET_COUNTING;
+            ctx->remaining = ctx->budget;
+            LOG_INF("reasoning-budget: activated, budget=%d tokens\n", ctx->budget);
+
+            if (ctx->remaining <= 0) {
+                ctx->state = REASONING_BUDGET_FORCING;
+                ctx->force_pos = 0;
+                LOG_INF("reasoning-budget: budget=0, forcing immediately\n");
+            }
+        }
+        break;
+    }
+    case REASONING_BUDGET_COUNTING:
+    case REASONING_BUDGET_WAITING_UTF8:
+    {
+        if (ctx->end_matcher.advance(token)) {
+            ctx->state = REASONING_BUDGET_DONE;
+            LOG_INF("reasoning-budget: deactivated (natural end)\n");
+            break;
+        }
+
+        bool utf8_complete = true;
+        if (ctx->vocab != nullptr) {
+            const std::string piece = common_token_to_piece(ctx->vocab, token, false);
+            utf8_complete = common_utf8_is_complete(piece);
+        }
+
+        if (ctx->state == REASONING_BUDGET_WAITING_UTF8) {
+            if (utf8_complete) {
+                ctx->state = REASONING_BUDGET_FORCING;
+                ctx->force_pos = 0;
+                ctx->end_matcher.reset();
+                LOG_INF("reasoning-budget: UTF-8 complete, now forcing end sequence\n");
+            }
+        } else if (ctx->state == REASONING_BUDGET_COUNTING) {
+            ctx->remaining--;
+            if (ctx->remaining <= 0) {
+                if (utf8_complete) {
+                    ctx->state = REASONING_BUDGET_FORCING;
+                    ctx->force_pos = 0;
+                    ctx->end_matcher.reset();
+                    LOG_INF("reasoning-budget: budget exhausted, forcing end sequence\n");
+                } else {
+                    ctx->state = REASONING_BUDGET_WAITING_UTF8;
+                    ctx->end_matcher.reset();
+                    LOG_INF("reasoning-budget: budget exhausted, waiting for UTF-8 completion\n");
+                }
+            }
+        }
+        break;
+    }
+    case REASONING_BUDGET_FORCING:
+        ctx->force_pos++;
+        if (ctx->force_pos >= ctx->forced_tokens.size()) {
+            ctx->state = REASONING_BUDGET_DONE;
+            LOG_INF("reasoning-budget: forced sequence complete, done\n");
+        }
+        break;
+    case REASONING_BUDGET_DONE:
+        break;
+    }
+}
+
+static void common_reasoning_budget_apply(struct common_reasoning_budget_ctx * smpl, llama_token_data_array * cur_p) {
+    auto * ctx = (common_reasoning_budget_ctx *)smpl;
+    if (!ctx) {
+        return;
+    }
+    if (ctx->state != REASONING_BUDGET_FORCING) {
+        // passthrough — don't modify logits
+        return;
+    }
+
+    if (ctx->force_pos >= ctx->forced_tokens.size()) {
+        return;
+    }
+
+    const llama_token forced = ctx->forced_tokens[ctx->force_pos];
+
+    // set all logits to -inf except the forced token
+    for (size_t i = 0; i < cur_p->size; i++) {
+        if (cur_p->data[i].id != forced) {
+            cur_p->data[i].logit = -INFINITY;
+        }
+    }
+}
+
+static void common_reasoning_budget_reset(common_reasoning_budget_ctx * smpl) {
+    auto * ctx = (common_reasoning_budget_ctx *)smpl;
+    ctx->state = REASONING_BUDGET_IDLE;
+    ctx->remaining = ctx->budget;
+    ctx->start_matcher.reset();
+    ctx->end_matcher.reset();
+    ctx->force_pos = 0;
+}
+
+// forward declaration for use in clone
+static struct common_reasoning_budget_ctx * common_reasoning_budget_init_state(
+    const struct llama_vocab * vocab, const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens, const std::vector<llama_token> & forced_tokens,
+    int32_t budget, common_reasoning_budget_state initial_state);
+
+static struct common_reasoning_budget_ctx * common_reasoning_budget_clone(const struct common_reasoning_budget_ctx * smpl) {
+    const auto * ctx = (const common_reasoning_budget_ctx *)smpl;
+    return common_reasoning_budget_init_state(
+        ctx->vocab,
+        ctx->start_matcher.tokens,
+        ctx->end_matcher.tokens,
+        ctx->forced_tokens,
+        ctx->budget,
+        ctx->state);
+}
+
+static void common_reasoning_budget_free(struct common_reasoning_budget_ctx * smpl) {
+    delete (common_reasoning_budget_ctx *)smpl;
+}
+
+//static struct llama_sampler_i common_reasoning_budget_i = {
+//    /* .name              = */ common_reasoning_budget_name,
+//    /* .accept            = */ common_reasoning_budget_accept,
+//    /* .apply             = */ common_reasoning_budget_apply,
+//    /* .reset             = */ common_reasoning_budget_reset,
+//    /* .clone             = */ common_reasoning_budget_clone,
+//    /* .free              = */ common_reasoning_budget_free,
+//    /* .backend_init      = */ nullptr,
+//    /* .backend_accept    = */ nullptr,
+//    /* .backend_apply     = */ nullptr,
+//    /* .backend_set_input = */ nullptr,
+//};
+
+static common_reasoning_budget_ctx * common_reasoning_budget_init_state(
+    const struct llama_vocab * vocab,
+    const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens,
+    const std::vector<llama_token> & forced_tokens,
+    int32_t                                budget,
+    common_reasoning_budget_state          initial_state) {
+    // promote COUNTING with budget <= 0 to FORCING
+    if (initial_state == REASONING_BUDGET_COUNTING && budget <= 0) {
+        initial_state = REASONING_BUDGET_FORCING;
+    }
+
+    return
+        /* .ctx   = */ new common_reasoning_budget_ctx{
+        /* .vocab         = */ vocab,
+        /* .start_matcher = */ { start_tokens, 0 },
+        /* .end_matcher   = */ { end_tokens, 0 },
+        /* .forced_tokens = */ forced_tokens,
+        /* .budget        = */ budget,
+        /* .remaining     = */ budget,
+        /* .state         = */ initial_state,
+        /* .force_pos     = */ 0,
+    };
+
+}
+
+struct common_reasoning_budget_ctx *  common_reasoning_budget_init(
+    const struct llama_vocab * vocab,
+    const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens,
+    const std::vector<llama_token> & forced_tokens,
+    int32_t                          budget,
+    const std::vector<llama_token> & prefill_tokens) {
+    // Determine initial state from prefill: COUNTING if the prefill begins with
+    // the start sequence but does not also contain the end sequence after it.
+    common_reasoning_budget_state initial_state = REASONING_BUDGET_IDLE;
+    if (!prefill_tokens.empty() && !start_tokens.empty() &&
+        prefill_tokens.size() >= start_tokens.size() &&
+        std::equal(start_tokens.begin(), start_tokens.end(), prefill_tokens.begin())) {
+        initial_state = REASONING_BUDGET_COUNTING;
+        // If the end sequence also follows the start in the prefill, reasoning
+        // was opened and immediately closed — stay IDLE.
+        if (!end_tokens.empty() &&
+            prefill_tokens.size() >= start_tokens.size() + end_tokens.size()) {
+            auto end_start = prefill_tokens.end() - (ptrdiff_t)end_tokens.size();
+            if (end_start >= prefill_tokens.begin() + (ptrdiff_t)start_tokens.size() &&
+                std::equal(end_tokens.begin(), end_tokens.end(), end_start)) {
+                initial_state = REASONING_BUDGET_IDLE;
+            }
+        }
+    }
+    return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
+}
+
+common_reasoning_budget_ctx *  common_reasoning_budget_init(
+    const struct llama_vocab * vocab,
+    const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens,
+    const std::vector<llama_token> & forced_tokens,
+    int32_t                          budget,
+    common_reasoning_budget_state    initial_state) {
+    return common_reasoning_budget_init_state(vocab, start_tokens, end_tokens, forced_tokens, budget, initial_state);
+}
+
+common_reasoning_budget_state common_reasoning_budget_get_state(const common_reasoning_budget_ctx * smpl) {
+    if (!smpl) {
+        return REASONING_BUDGET_IDLE;
+    }
+    return ((const common_reasoning_budget_ctx *)smpl)->state;
+}
diff --git a/common/reasoning-budget.h b/common/reasoning-budget.h
new file mode 100644
index 0000000000..17778ecfe0
--- /dev/null
+++ b/common/reasoning-budget.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include "llama.h"
+
+#include <cstdint>
+#include <vector>
+
+enum common_reasoning_budget_state {
+    REASONING_BUDGET_IDLE,         // waiting for start sequence
+    REASONING_BUDGET_COUNTING,     // counting down tokens
+    REASONING_BUDGET_FORCING,      // forcing budget message + end sequence
+    REASONING_BUDGET_WAITING_UTF8, // budget exhausted, waiting for UTF-8 completion
+    REASONING_BUDGET_DONE,         // passthrough forever
+};
+
+// Creates a reasoning budget sampler that limits token generation inside a
+// reasoning block (e.g. between <think> and </think>).
+//
+// State machine: IDLE -> COUNTING -> WAITING_UTF8 -> FORCING -> DONE
+//   IDLE:         passthrough, watching for start_tokens sequence
+//   COUNTING:     counting down remaining tokens, watching for natural end_tokens
+//   WAITING_UTF8: budget exhausted, allowing tokens to complete a UTF-8 sequence
+//   FORCING:      forces forced_tokens token-by-token (all other logits -> -inf)
+//   DONE:         passthrough forever
+//
+// Parameters:
+//   vocab          - vocabulary (used for UTF-8 boundary detection; can be nullptr)
+//   start_tokens   - token sequence that activates counting
+//   end_tokens     - token sequence for natural deactivation
+//   forced_tokens  - token sequence forced when budget expires
+//   budget         - max tokens allowed in the reasoning block
+//   prefill_tokens - tokens already present in the prompt (generation prompt);
+//                    used to determine the initial state: COUNTING if they begin
+//                    with start_tokens (but don't also end with end_tokens),
+//                    IDLE otherwise. COUNTING with budget <= 0 is promoted to FORCING.
+//
+
+struct common_reasoning_budget_ctx * common_reasoning_budget_init(
+        const struct llama_vocab       * vocab,
+        const std::vector<llama_token> & start_tokens,
+        const std::vector<llama_token> & end_tokens,
+        const std::vector<llama_token> & forced_tokens,
+        int32_t                          budget,
+        const std::vector<llama_token> & prefill_tokens = {});
+
+// Variant that takes an explicit initial state (used by tests and clone).
+// COUNTING with budget <= 0 is promoted to FORCING.
+struct common_reasoning_budget_ctx * common_reasoning_budget_init(
+    const struct llama_vocab * vocab,
+    const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens,
+    const std::vector<llama_token> & forced_tokens,
+    int32_t                          budget,
+    common_reasoning_budget_state    initial_state);
+
+common_reasoning_budget_state common_reasoning_budget_get_state(const common_reasoning_budget_ctx * smpl);
diff --git a/common/regex-partial.cpp b/common/regex-partial.cpp
index c3138f4547..c89b7762c6 100644
--- a/common/regex-partial.cpp
+++ b/common/regex-partial.cpp
@@ -102,7 +102,7 @@ std::string regex_to_reversed_partial_regex(const std::string & pattern) {
                 auto is_star = *it == '*';
                 ++it;
                 if (it != end && is_star) {
-                    if (*it == '?') {
+                    if (it != end && *it == '?') {
                         ++it;
                     }
                 }
diff --git a/common/sampling.cpp b/common/sampling.cpp
index 3c0b748455..24106338f4 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -2,6 +2,8 @@
 #include "sampling.h"
 #include "llama-vocab.h"
 #include "common.h"
+#include "reasoning-budget.cpp"
+
 #include <random>
 #include <nlohmann/json.hpp>
 using json = nlohmann::ordered_json;
@@ -13,12 +15,14 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
 
     result->params  = params;
     result->grammar = nullptr;
-
+    result->rbudget = nullptr;
 
     struct llama_grammar* grmr;
-    if (params.grammar.compare(0, 11, "%llguidance") == 0) {
+    const std::string & grammar_str = common_grammar_value(params.grammar);
+    if (grammar_str.compare(0, 11, "%llguidance") == 0) {
 #ifdef LLAMA_USE_LLGUIDANCE
         grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
+        result->grammar = grmr;
 #else
         GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
 #endif // LLAMA_USE_LLGUIDANCE
@@ -68,18 +72,66 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
             trigger_patterns_c.push_back(regex.c_str());
         }
 
-        grmr = params.grammar_lazy
-            ? llama_sampler_init_grammar_lazy_patterns(vocab, params.grammar.c_str(), "root",
-                trigger_patterns_c.data(), trigger_patterns_c.size(),
-                trigger_tokens.data(), trigger_tokens.size())
-            : llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
+        if (!grammar_str.empty()) {
+            grmr = params.grammar_lazy
+                ? llama_sampler_init_grammar_lazy_patterns(vocab, grammar_str.c_str(), "root",
+                    trigger_patterns_c.data(), trigger_patterns_c.size(),
+                    trigger_tokens.data(), trigger_tokens.size())
+                : llama_sampler_init_grammar(vocab, grammar_str.c_str(), "root");
+            if (grmr) {
+                result->prev.resize(params.n_prev);
+                result->n_valid = 0;
+                result->grammar_str = grammar_str;
+                result->grammar_root = "root";
+                result->grammar = grmr;
+            }
+        }
+    }
+    
+
+
+
+    // Feed generation prompt tokens to the grammar sampler so it advances past
+    // tokens the template already placed in the prompt.
+    // Only applies to output-format and tool-call grammars; user-supplied grammars must not be prefilled.
+    std::vector<llama_token> prefill_tokens;
+    if (!params.generation_prompt.empty() && common_grammar_needs_prefill(params.grammar)) {
+        GGML_ASSERT(vocab != nullptr);
+        prefill_tokens = common_tokenize(vocab, params.generation_prompt, false, true);
+        if (!prefill_tokens.empty()) {
+            std::string first_token = common_token_to_piece(vocab, prefill_tokens[0], true);
+            if (std::isspace(first_token[0]) && !std::isspace(params.generation_prompt[0])) {
+                // Some tokenizers will add a space before the first special token, need to remove
+                prefill_tokens = std::vector<llama_token>(prefill_tokens.begin() + 1, prefill_tokens.end());
+            }
+        }
+
+        if (grmr && !params.grammar_lazy) {
+            try {
+                for (const auto & token : prefill_tokens) {
+                    llama_grammar_accept_impl(*grmr, vocab, nullptr, token);
+                    LOG_DBG("%s: accepted prefill token (%d)\n", __func__, token);
+                }
+            }
+            catch (std::exception & e) {
+                LOG_ERR("%s: error initializing grammar sampler for grammar:\n%s\n\nGeneration prompt:\n'%s'\n", __func__,
+                    common_grammar_value(params.grammar).c_str(), params.generation_prompt.c_str());
+                throw e;
+            }
+        }
+    }
 
-        result->prev.resize(params.n_prev);
-        result->n_valid = 0;
-	    result->grammar_str = params.grammar;
-	    result->grammar_root = "root";
+    // reasoning budget sampler (skip when budget is unlimited unless a lazy grammar is active, which needs rbudget for thinking-block suppression)
+    if (!params.reasoning_budget_start.empty() && !params.reasoning_budget_end.empty() && (params.grammar_lazy || params.reasoning_budget_tokens >= 0)) {
+        result->rbudget = common_reasoning_budget_init(
+            vocab,
+            params.reasoning_budget_start,
+            params.reasoning_budget_end,
+            params.reasoning_budget_forced,
+            params.reasoning_budget_tokens < 0 ? INT_MAX : params.reasoning_budget_tokens,
+            prefill_tokens);
     }
-    result->grammar = grmr;
+
     llama_sampling_set_rng_seed(result, params.seed);
     for (const auto& cnstr : params.samplers_sequence)
     {
@@ -113,11 +165,16 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
 }
 
 void common_sampler_free(struct common_sampler * ctx) {
-    if (ctx->grammar != NULL) {
+    if (!ctx) {
+        return;
+    }
+    if (ctx->grammar) {
         llama_grammar_free(ctx->grammar);
     }
-    if (ctx->smpl !=NULL)
+    if (ctx->smpl)
         llama_sampler_dry_free(ctx->smpl);
+    if (ctx->rbudget)
+        common_reasoning_budget_free(ctx->rbudget);
     delete ctx;
 }
 
@@ -173,6 +230,9 @@ void common_sampler_clone(common_sampler * src, common_sampler * dst) {
 
     dst->prev = src->prev;
     dst->smpl = llama_sampler_dry_clone(src->smpl);
+    if (src->rbudget) {
+        dst->rbudget = common_reasoning_budget_clone(src->rbudget);
+    }
 }
 
 llama_token llama_sampling_last(common_sampler * ctx) {
@@ -376,12 +436,27 @@ static void sampler_queue(
     }
 }
 
+static bool grammar_should_apply(struct common_sampler * gsmpl) {
+    if (!gsmpl->grammar) {
+        return false;
+    }
+    if (!gsmpl->rbudget) {
+        return true;
+    }
+    if (gsmpl->params.grammar_lazy) {
+        // if grammar is lazy, only apply when reasoning budget is not active
+        const auto state = common_reasoning_budget_get_state(gsmpl->rbudget);
+        return state == REASONING_BUDGET_IDLE || state == REASONING_BUDGET_DONE;
+    }
+    return true;
+}
+
 static llama_token llama_sampling_sample_impl(
                   struct common_sampler * ctx_sampling,
                   struct llama_context * ctx_main,
                   struct llama_context * ctx_cfg,
                   const int idx,
-                  bool is_resampling) {
+                  bool grammar_first) {
     const common_params_sampling & params = ctx_sampling->params;
 
     const float   temp            = params.temp;
@@ -391,19 +466,24 @@ static llama_token llama_sampling_sample_impl(
     const float   adaptive_target = params.adaptive_target;
 
     std::vector<float> original_logits;
-    llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, /* apply_grammar= */ is_resampling, &original_logits);
+    llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, /* grammar_first= */ grammar_first, &original_logits);
     llama_token_data_array & cur_p = ctx_sampling->cur_p;
-    if (ctx_sampling->grammar != NULL && !is_resampling) {
+    if (ctx_sampling->grammar != NULL && !grammar_first) {
         GGML_ASSERT(!original_logits.empty());
     }
+    auto & rbudget = ctx_sampling->rbudget;
+
     llama_token id = 0;
+    float * logits = llama_get_logits_ith(ctx_main, idx);
+    // apply reasoning budget first
+    common_reasoning_budget_apply(rbudget, &cur_p);
     // Sample grammar first for resampling
-    if (ctx_sampling->grammar != NULL && is_resampling) {
-        float* logits = llama_get_logits_ith(ctx_main, idx);
+    if (ctx_sampling->grammar != NULL && grammar_first && grammar_should_apply(ctx_sampling)) {
         // Apply grammar constraints to all candidates
-        llama_grammar_sample(ctx_sampling->grammar, ctx_main, &cur_p);
+        llama_grammar_apply(ctx_sampling->grammar, ctx_main, &cur_p);
     }
 
+    // llama_sampler_apply
     if (temp < 0.0) {
         // greedy sampling, with probs
         llama_sample_softmax(ctx_main, &cur_p);
@@ -434,7 +514,12 @@ static llama_token llama_sampling_sample_impl(
         }
     }
 
-    if (ctx_sampling->grammar != NULL && !is_resampling) {
+    id = cur_p.data[cur_p.selected].id;
+    if (grammar_first || !grammar_should_apply(ctx_sampling)) {
+        return id;
+    }
+
+    if (ctx_sampling->grammar != NULL && !grammar_first && grammar_should_apply(ctx_sampling)) {
         // Get a pointer to the logits
         float * logits = llama_get_logits_ith(ctx_main, idx);
 
@@ -443,7 +528,7 @@ static llama_token llama_sampling_sample_impl(
         llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
 
         // Apply grammar constraints to the single token
-        llama_grammar_sample(ctx_sampling->grammar, ctx_main, &single_token_data_array);
+        llama_grammar_apply(ctx_sampling->grammar, ctx_main, &single_token_data_array);
 
         // Check if the token is valid according to the grammar by seeing if its logit has been set to -INFINITY
         bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
@@ -468,7 +553,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
                   struct llama_context * ctx_main,
                   struct llama_context * ctx_cfg,
                   const int idx,
-                  bool apply_grammar,
+                  bool grammar_first,
                   std::vector<float> * original_logits) {
     const common_params_sampling & params = ctx_sampling->params;
 
@@ -487,7 +572,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
     // Get a pointer to the logits
     float * logits = llama_get_logits_ith(ctx_main, idx);
 
-    if (ctx_sampling->grammar != NULL && !apply_grammar) {
+    if (ctx_sampling->grammar != NULL && !grammar_first) {
         GGML_ASSERT(original_logits != NULL);
         // Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
         *original_logits = {logits, logits + n_vocab};
@@ -540,8 +625,8 @@ static llama_token_data_array llama_sampling_prepare_impl(
     }
 
     // apply grammar checks before sampling logic
-    if (apply_grammar && ctx_sampling->grammar != NULL) {
-        llama_grammar_sample(ctx_sampling->grammar, ctx_main, &cur_p);
+    if (grammar_first && ctx_sampling->grammar != NULL) {
+        llama_grammar_apply(ctx_sampling->grammar, ctx_main, &cur_p);
     }
 
     return cur_p;
@@ -570,27 +655,32 @@ llama_token_data_array llama_sampling_prepare(
                   struct llama_context * ctx_main,
                   struct llama_context * ctx_cfg,
                   const int idx,
-                  bool apply_grammar,
+                  bool grammar_first,
                   std::vector<float> * original_logits) {
-    return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits);
+    return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, grammar_first, original_logits);
 }
 
 void common_sampler_accept(
         struct common_sampler * ctx_sampling,
         struct llama_context * ctx_main,
-        llama_token id,
-        bool apply_grammar) {
+        llama_token token,
+        bool accept_grammar) {
     if (ctx_sampling->prev.size() > 0) {
-    ctx_sampling->prev.erase(ctx_sampling->prev.begin());
+        ctx_sampling->prev.erase(ctx_sampling->prev.begin());
+    }
+    ctx_sampling->prev.push_back(token);
 
+    // grammar_should_apply() checks the reasoning budget state, so calculate this before we accept
+    accept_grammar = accept_grammar && grammar_should_apply(ctx_sampling);
+    if (ctx_sampling->rbudget) {
+        common_reasoning_budget_accept(ctx_sampling->rbudget, token);
     }
-    ctx_sampling->prev.push_back(id);
 
-    if (ctx_sampling->grammar != NULL && apply_grammar) {
-        llama_grammar_accept_token(ctx_sampling->grammar, ctx_main, id);
+    if (ctx_sampling->grammar != NULL && accept_grammar) {
+        llama_grammar_accept_token(ctx_sampling->grammar, ctx_main, token);
     }
     if (ctx_sampling->smpl) {
-        llama_sampler_dry_accept(ctx_sampling->smpl, id);
+        llama_sampler_dry_accept(ctx_sampling->smpl, token);
     }
 }
 
diff --git a/common/sampling.h b/common/sampling.h
index 94042fe1eb..fafdc69320 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -2,6 +2,7 @@
 
 #include "llama.h"
 #include "llama-grammar.h"
+#include "reasoning-budget.h"
 #include <set>
 #include <random>
 #include <string>
@@ -41,6 +42,43 @@ struct common_grammar_trigger {
 };
 
 
+// Grammar type enumeration
+enum common_grammar_type {
+    COMMON_GRAMMAR_TYPE_NONE,           // no grammar set
+    COMMON_GRAMMAR_TYPE_USER,           // user-provided GBNF (--grammar / "grammar" API field)
+    COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT,  // auto-generated from JSON schema (--json-schema / "json_schema" API field)
+    COMMON_GRAMMAR_TYPE_TOOL_CALLS,     // auto-generated by chat template parser for function calling
+};
+
+// Grammar variant struct with type and grammar string
+struct common_grammar {
+    common_grammar_type type = COMMON_GRAMMAR_TYPE_NONE;
+    std::string grammar;
+
+    // Default constructor - no grammar
+    common_grammar() = default;
+
+    // Constructor with type and grammar string
+    common_grammar(common_grammar_type t, std::string g) : type(t), grammar(std::move(g)) {
+        GGML_ASSERT(type != COMMON_GRAMMAR_TYPE_NONE || !grammar.empty());
+    }
+
+    // Check if a grammar is set
+    bool empty() const { return type == COMMON_GRAMMAR_TYPE_NONE || grammar.empty(); }
+};
+
+// Returns the raw grammar string, or empty string if no grammar is set.
+inline const std::string & common_grammar_value(const common_grammar & g) {
+    return g.grammar;
+}
+
+// Returns true when the generation_prompt should be prefilled into the grammar sampler.
+// Only output-format and tool-call grammars need prefill; user-supplied grammars must not be prefilled.
+inline bool common_grammar_needs_prefill(const common_grammar & g) {
+    return g.type == COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT
+        || g.type == COMMON_GRAMMAR_TYPE_TOOL_CALLS;
+}
+
 
 // sampling parameters
 typedef struct common_params_sampling {
@@ -93,7 +131,8 @@ typedef struct common_params_sampling {
     };
 
 
-    std::string grammar;  // optional BNF-like grammar to constrain sampling
+    //std::string grammar;  // optional BNF-like grammar to constrain sampling
+    common_grammar              grammar;      // optional grammar constraint (user / output-format / tool-calls)
     bool                                grammar_lazy = false;
     std::vector<common_grammar_trigger> grammar_triggers; // optional triggers (for lazy grammars)
     std::set<llama_token>               preserved_tokens;
@@ -104,6 +143,20 @@ typedef struct common_params_sampling {
 
     std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
 
+    // The assistant generation prompt already prefilled into the prompt.
+    // Fed to the grammar sampler (to advance past pre-existing tokens) and used
+    // to determine the reasoning budget sampler's initial state.
+    // Only applied when the grammar is of output-format or tool-calls type.
+    std::string generation_prompt;
+
+    // reasoning budget sampler parameters
+    // these are populated by the server/CLI based on chat template params
+    int32_t                  reasoning_budget_tokens = -1;   // -1 = disabled, >= 0 = token budget
+    std::vector<llama_token> reasoning_budget_start;           // start tag token sequence
+    std::vector<llama_token> reasoning_budget_end;             // end tag token sequence
+    std::vector<llama_token> reasoning_budget_forced;          // forced sequence (message + end tag)
+
+
     std::vector<llama_token> penalty_prompt_tokens;
     bool                     use_penalty_prompt_tokens = false;
 } llama_sampling_params;
@@ -129,6 +182,8 @@ struct common_sampler {
 
     llama_sampler_adaptive_p * adapt_p_ctx;    // adaptive p sampler
 
+    common_reasoning_budget_ctx * rbudget; // reasoning budget sampler
+
     size_t n_valid; // Number of correct top tokens with correct probabilities.
 
     llama_token_data_array cur_p; // current candidates
diff --git a/common/unicode.cpp b/common/unicode.cpp
index 56ab0f468e..f71fe56783 100644
--- a/common/unicode.cpp
+++ b/common/unicode.cpp
@@ -1,14 +1,20 @@
 #include "unicode.h"
 
+#include <algorithm>
+#include <cassert>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
 // implementation adopted from src/unicode.cpp
 
-size_t utf8_sequence_length(unsigned char first_byte) {
+size_t common_utf8_sequence_length(unsigned char first_byte) {
     const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
     uint8_t highbits = static_cast<uint8_t>(first_byte) >> 4;
     return lookup[highbits];
 }
 
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset) {
     if (offset >= input.size()) {
         return utf8_parse_result(utf8_parse_result::INCOMPLETE);
     }
@@ -62,3 +68,57 @@ utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset) {
     // Invalid first byte
     return utf8_parse_result(utf8_parse_result::INVALID);
 }
+
+bool common_utf8_is_complete(const std::string & s) {
+    if (s.empty()) {
+        return true;
+    }
+    for (int i = 1; i <= std::min(4, (int)s.size()); i++) {
+        unsigned char c = s[s.size() - i];
+        if ((c & 0xC0) != 0x80) {
+            int expected = (c >= 0xF0) ? 4 : (c >= 0xE0) ? 3 : (c >= 0xC0) ? 2 : 1;
+            return i >= expected;
+        }
+    }
+    return false;
+}
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
+    std::string result;
+    for (size_t i = 0; i < cps.size(); ++i) {
+        result.append(common_unicode_cpt_to_utf8(cps[i]));
+    }
+    return result;
+}
+
+std::string common_unicode_cpt_to_utf8(uint32_t cpt) {
+    std::string result;
+
+    if (/* 0x00 <= cpt && */ cpt <= 0x7f) {
+        result.push_back(cpt);
+        return result;
+    }
+    if (0x80 <= cpt && cpt <= 0x7ff) {
+        result.push_back(0xc0 | ((cpt >> 6) & 0x1f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+    if (0x800 <= cpt && cpt <= 0xffff) {
+        result.push_back(0xe0 | ((cpt >> 12) & 0x0f));
+        result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+    if (0x10000 <= cpt && cpt <= 0x10ffff) {
+        result.push_back(0xf0 | ((cpt >> 18) & 0x07));
+        result.push_back(0x80 | ((cpt >> 12) & 0x3f));
+        result.push_back(0x80 | ((cpt >> 6) & 0x3f));
+        result.push_back(0x80 | (cpt & 0x3f));
+        return result;
+    }
+
+    throw std::invalid_argument("invalid codepoint");
+}
+
+
+
diff --git a/common/unicode.h b/common/unicode.h
index 9d9e8e1227..9b32fa19d6 100644
--- a/common/unicode.h
+++ b/common/unicode.h
@@ -2,6 +2,8 @@
 
 #include <cstdint>
 #include <string_view>
+#include <vector>
+#include <string>
 
 // UTF-8 parsing utilities for streaming-aware unicode support
 
@@ -16,7 +18,13 @@ struct utf8_parse_result {
 
 // Determine the expected length of a UTF-8 sequence from its first byte
 // Returns 0 for invalid first bytes
-size_t utf8_sequence_length(unsigned char first_byte);
+size_t common_utf8_sequence_length(unsigned char first_byte);
+
+// Check if a string ends with a complete UTF-8 sequence.
+bool common_utf8_is_complete(const std::string & s);
 
 // Parse a single UTF-8 codepoint from input
-utf8_parse_result parse_utf8_codepoint(std::string_view input, size_t offset);
+utf8_parse_result common_parse_utf8_codepoint(std::string_view input, size_t offset);
+
+std::string common_unicode_cpts_to_utf8(const std::vector<uint32_t> & cps);
+std::string common_unicode_cpt_to_utf8(uint32_t cpt);
diff --git a/docs/autoparser.md b/docs/autoparser.md
new file mode 100644
index 0000000000..adc4d43ed6
--- /dev/null
+++ b/docs/autoparser.md
@@ -0,0 +1,533 @@
+# Auto-Parser Architecture
+
+The auto-parser automatically analyzes chat templates to determine how to parse model outputs, including content, reasoning, and tool calls.
+
+## Overview
+
+The unified auto-parser uses a pure differential, compositional approach (inspired by the `git diff` algorithm) to analyze chat templates:
+
+**Core Philosophy**:
+
+- **Minimize Hardcoded Patterns**: All markers extracted through template comparison (the only heuristic is JSON detection to distinguish `JSON_NATIVE` from tag-based formats)
+- **Compositional Architecture**: Separate analyzer structs for reasoning, content, and tools — each responsible for its own analysis and parser construction
+
+**Analysis + Parser Building in Two Steps**:
+
+1. `autoparser::autoparser tmpl_analysis(tmpl)` — runs all differential comparisons and populates the analysis structs
+2. `autoparser::peg_generator::generate_parser(tmpl, generation_params, tmpl_analysis)` — uses the analysis to build a PEG parser and optional GBNF grammar
+
+## Data Structures
+
+All structs are defined in [common/chat-auto-parser.h](common/chat-auto-parser.h).
+
+### Top-Level: `autoparser` (main analyzer and generator)
+
+[common/chat-auto-parser.h:367-388](common/chat-auto-parser.h#L367-L388) — top-level analysis result aggregating `jinja_caps`, `reasoning`, `content`, and `tools` sub-analyses, plus `preserved_tokens` (union of all non-empty markers).
+
+### `analyze_reasoning`
+
+[common/chat-auto-parser.h:254-274](common/chat-auto-parser.h#L254-L274) — reasoning analysis result: `mode` enum, `start` marker (e.g. `<think>`), and `end` marker (e.g. `</think>`).
+
+### `analyze_content`
+
+[common/chat-auto-parser.h:280-295](common/chat-auto-parser.h#L280-L295) — content analysis result: `mode` enum, `start`/`end` markers, and `requires_nonnull_content` flag.
+
+### `analyze_tools` and its sub-structs
+
+- [common/chat-auto-parser.h:176-194](common/chat-auto-parser.h#L176-L194) — `tool_format_analysis`: `mode` enum, `section_start/end`, `per_call_start/end`, JSON field names (`function_field`, `name_field`, `args_field`, `id_field`, `gen_id_field`), and format flags (`fun_name_is_key`, `tools_array_wrapped`)
+- [common/chat-auto-parser.h:196-200](common/chat-auto-parser.h#L196-L200) — `tool_function_analysis`: `name_prefix`, `name_suffix`, `close` markers around function names
+- [common/chat-auto-parser.h:202-210](common/chat-auto-parser.h#L202-L210) — `tool_arguments_analysis`: `start/end` container markers, `name_prefix/suffix`, `value_prefix/suffix`, `separator`
+- [common/chat-auto-parser.h:212-217](common/chat-auto-parser.h#L212-L217) — `tool_id_analysis`: `pos` enum, `prefix`/`suffix` markers around call ID values
+- [common/chat-auto-parser.h:301-361](common/chat-auto-parser.h#L301-L361) — `analyze_tools`: aggregates the four sub-structs above
+
+### Enums
+
+**`reasoning_mode`**: How the template handles reasoning/thinking blocks.
+
+| Value           | Description                                                                       |
+|-----------------|-----------------------------------------------------------------------------------|
+| `NONE`          | No reasoning markers detected                                                     |
+| `TAG_BASED`     | Tag-based: `<think>...</think>` (start can be empty for delimiter-style formats)  |
+| `TOOLS_ONLY`    | Reasoning only appears in tool call responses, not plain content                  |
+
+**Generation Prompt & Reasoning Prefill**: Computed in `common_chat_templates_apply_jinja` before invoking either the specialized handlers or the auto-parser, by rendering the template twice — once with `add_generation_prompt=false` and once with `add_generation_prompt=true` — and storing the diff suffix as `generation_params::generation_prompt`. This string is propagated into `common_chat_params::generation_prompt` and `common_chat_parser_params::generation_prompt`.
+
+The generation prompt is prepended to model output before PEG parsing via `wrap_for_generation_prompt()`. The portion *before* the reasoning start marker (if any) is prepended as a literal to ensure any boilerplate added by the template is consumed. The full string is also fed to the grammar sampler via `llama_sampler_accept` (stored in `common_params_sampling::grammar_prefill`), advancing the grammar past tokens already in the prompt. It is used to determine the reasoning budget sampler's initial state — COUNTING if the prefill tokens begin with the reasoning start sequence (but don't also contain the end sequence), IDLE otherwise.
+
+**`grammar_prefill`** (`common_params_sampling`): The generation prompt string tokenized and accepted by the grammar sampler at init time. Only applied when `grammar_external` is false (i.e., the grammar was not set explicitly by the user).
+
+Three outcomes for reasoning-prefill handling (in `generate_parser()`):
+
+1. **Start+end in generation prompt** (e.g. `<think></think>\n`): the parser sees reasoning as opened and immediately closed; whitespace-only reasoning content is discarded.
+2. **Only start in generation prompt** (e.g. `<think>\n`): the parser sees reasoning as already open.
+3. **Start marker present but not at the end** (e.g. Apriel's `<|begin_assistant|>` followed by boilerplate): the marker is a template artifact; the start literal is cleared so reasoning uses delimiter-style (end-only). For templates that ignore `add_generation_prompt` (empty diff), the rendered `data.prompt` is used as fallback — but only for non-TOOLS_ONLY modes, since in TOOLS_ONLY the start tag is model-generated and may appear in prior conversation turns.
+
+**`content_mode`**: How the template wraps assistant content.
+
+| Value                    | Description                                                    |
+|--------------------------|----------------------------------------------------------------|
+| `PLAIN`                  | No content markers                                             |
+| `ALWAYS_WRAPPED`         | Content always wrapped: `<response>...</response>`             |
+| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present                 |
+
+**`tool_format`**: Classification of tool call structure.
+
+| Value            | Description                                                      |
+|------------------|------------------------------------------------------------------|
+| `NONE`           | No tool support detected                                         |
+| `JSON_NATIVE`    | Pure JSON: `{"name": "X", "arguments": {...}}`                   |
+| `TAG_WITH_JSON`  | Tag-based with JSON args: `<function=X>{...}</function>`         |
+| `TAG_WITH_TAGGED`| Tag-based with tagged args: `<param=key>value</param>`           |
+
+**`call_id_position`**: Where call IDs appear in tag-based formats.
+
+| Value                    | Description                                  |
+|--------------------------|----------------------------------------------|
+| `NONE`                   | No call ID support detected                  |
+| `PRE_FUNC_NAME`          | Before function name                         |
+| `BETWEEN_FUNC_AND_ARGS`  | Between function name and arguments          |
+| `POST_ARGS`              | After arguments                              |
+
+## Tool Calling Formats
+
+### JSON_NATIVE
+
+**Structure**: The entire tool call (function name, arguments, values) is in JSON format. Optional enclosing tags around the section.
+
+**Detection**: Function name appears inside a JSON structure (quotes preceded by `{` or `:`).
+
+**Examples**:
+
+Standard OpenAI-style:
+
+```json
+<tool_call>
+{"name": "get_weather", "arguments": {"location": "Paris", "unit": "celsius"}}
+</tool_call>
+```
+
+Mistral Nemo with array wrapper:
+
+```json
+[TOOL_CALLS]
+[{"name": "calculate", "arguments": {"expr": "2+2"}}]
+```
+
+Function name as JSON key (Apertus style):
+
+```json
+{"get_weather": {"location": "Paris"}}
+```
+
+---
+
+### TAG_WITH_JSON
+
+**Structure**: Function name is outside JSON, in tag attributes or XML-style tags. Arguments are a JSON object.
+
+**Detection**: Function name not in JSON, but argument names appear in JSON context.
+
+**Examples**:
+
+Functionary v3.1:
+
+```xml
+<function=get_weather>{"location": "Paris", "unit": "celsius"}</function>
+```
+
+MiniMax:
+
+```xml
+<minimax:tool_call>
+<tool_name>calculate</tool_name>
+<arguments>{"expr": "2+2"}</arguments>
+</minimax:tool_call>
+```
+
+---
+
+### TAG_WITH_TAGGED
+
+**Structure**: Both function name and argument names are in XML-style tags. String values are unquoted; non-string values are JSON-formatted.
+
+**Detection**: Neither function name nor argument names appear in a JSON context.
+
+**Examples**:
+
+Qwen/Hermes XML format:
+
+```xml
+<function=get_weather>
+<param=location>Paris</param>
+<param=unit>celsius</param>
+</function>
+```
+
+Mixed types:
+
+```xml
+<function=calculate>
+<param=expr>2+2</param>
+<param=precision>2</param>
+<param=options>{"round": true}</param>
+</function>
+```
+
+String values (`Paris`, `celsius`, `2+2`) are unquoted; `options` (object type) is JSON-formatted.
+
+---
+
+## Analysis Flow
+
+```text
+autoparser::autoparser(tmpl)
+    |
+    |-- Phase 1: analyze_reasoning(tmpl, jinja_caps.supports_tool_calls)
+    |     |-- R1: compare_reasoning_presence()   — with/without reasoning_content field
+    |     |-- R2: compare_thinking_enabled()     — enable_thinking=false vs true
+    |     '-- R3: compare_reasoning_scope()      — reasoning+content vs reasoning+tools
+    |           (only if supports_tool_calls)
+    |
+    |-- Phase 2: analyze_content(tmpl, reasoning)
+    |     '-- C1: compares content-only vs tools output and content-only vs reasoning output
+    |
+    |-- Phase 3: analyze_tools(tmpl, jinja_caps, reasoning)
+    |     (skipped entirely if !jinja_caps.supports_tool_calls)
+    |     |
+    |     |-- T1: analyze_tool_calls()           — no tools vs with tools; classifies format
+    |     |         |-- JSON path → analyze_tool_call_format_json_native()
+    |     |         '-- tag path → analyze_tool_call_format_non_json()
+    |     |
+    |     (if format != NONE and format != JSON_NATIVE:)
+    |     |
+    |     |-- T2: check_per_call_markers()       — 1 call vs 2 calls; moves section→per-call if needed
+    |     |         (only if supports_parallel_tool_calls)
+    |     |
+    |     |-- T3: extract_function_markers()     — func_alpha vs func_beta; extracts name prefix/suffix/close
+    |     |
+    |     |-- T4: analyze_arguments()            — (TAG_WITH_TAGGED only)
+    |     |         |-- A1: extract_argument_name_markers()   — arg_name_A vs arg_name_B
+    |     |         '-- A2: extract_argument_value_markers()  — value "XXXX" vs "YYYY"
+    |     |
+    |     |-- T5: extract_argument_separator()   — 1 arg vs 2 args; finds separator between args
+    |     |
+    |     |-- T6: extract_args_markers()         — 0 args vs 1 arg; finds args container markers
+    |     |
+    |     '-- T7: extract_call_id_markers()      — call_id "call00001" vs "call99999"
+    |
+    '-- collect_preserved_tokens()               — union of all non-empty markers
+    |
+    '-- apply workarounds()                      — post-hoc patches for edge-case templates
+    |
+    v
+autoparser (analysis result)
+    |
+    v
+autoparser::peg_generator::generate_parser(tmpl, inputs, analysis)
+    |-- analysis.build_parser(inputs)            — builds PEG parser arena
+    |     |-- reasoning.build_parser(ctx)        — reasoning parser (mode-dependent)
+    |     |-- content.build_parser(ctx)          — content parser (mode-dependent)
+    |     '-- tools.build_parser(ctx)            — tool parser (dispatches by tool_format)
+    |           |-- build_tool_parser_json_native()
+    |           |-- build_tool_parser_tag_json()
+    |           '-- build_tool_parser_tag_tagged()
+    |
+    |-- Build GBNF grammar (if tools present and trigger_marker non-empty)
+    '-- Set grammar_triggers from section_start or per_call_start
+    |
+    v
+common_chat_params (prompt, parser, grammar, triggers, preserved_tokens)
+```
+
+## Entry Point
+
+The auto-parser is invoked in [common/chat.cpp:1280-1310](common/chat.cpp#L1280-L1310) in `common_chat_templates_apply_jinja`. A few specialized templates are handled first (Ministral/Magistral Large 3, GPT-OSS with `<|channel|>`, Functionary v3.2 with `>>>all`), then the auto-parser handles everything else via `autoparser::autoparser` + `peg_generator::generate_parser`.
+
+## Algorithm Details
+
+### Core Mechanism: Differential Comparison
+
+All analysis phases use the same factorized comparison function declared in [common/chat-auto-parser-helpers.h:68](common/chat-auto-parser-helpers.h#L68):
+
+```cpp
+compare_variants(tmpl, params_A, params_modifier)
+```
+
+This creates variant B by applying a modifier lambda to a copy of `params_A`, renders both through the template, and computes a `diff_split` ([common/chat-auto-parser.h:28-37](common/chat-auto-parser.h#L28-L37)):
+
+- `prefix` — common prefix between A and B
+- `suffix` — common suffix between A and B
+- `left` — unique to variant A
+- `right` — unique to variant B
+
+The diff is computed via `calculate_diff_split()`, which finds the longest-common-prefix and longest-common-suffix, then iteratively moves incomplete `<...>` or `[...]` markers from the prefix/suffix into left/right until stable (tag boundary fixing).
+
+Text is segmentized into markers and non-marker fragments using `segmentize_markers()`, which splits on `<...>` and `[...]` boundaries.
+
+### Phase 1: Reasoning Analysis
+
+**R1 — `compare_reasoning_presence()`**: Compares assistant message with vs without a `reasoning_content` field.
+
+- Searches `diff.right` (output with reasoning) for the reasoning content needle
+- Uses PEG parsers to find surrounding markers:
+  - If both pre/post markers found in `diff.right` → `TAG_BASED`
+  - If both found but post marker only in the full output B → `TAG_BASED` (template forces markers; handled via prefill)
+  - If only post marker found → `TAG_BASED` (delimiter-style, empty start)
+- Sets `reasoning.start` and `reasoning.end`
+
+**R2 — `compare_thinking_enabled()`**: Compares `enable_thinking=false` vs `true` with a generation prompt.
+
+- Detects template-added reasoning markers: `enable_thinking=true` appends a non-empty marker → sets `reasoning.start`, mode = `TAG_BASED`
+- Handles the reverse case (`enable_thinking=false` appends the marker instead): extracts both start (from the preceding segment) and end markers; mode = `TAG_BASED`
+- The reasoning prefill (markers added by the template) is later extracted in `common_chat_templates_apply_jinja` and prepended to model output before parsing
+
+**R3 — `compare_reasoning_scope()`**: Compares assistant message with reasoning+text-content vs reasoning+tool-calls.
+
+- Only runs if `jinja_caps.supports_tool_calls`
+- Detects `TOOLS_ONLY`: reasoning content present in B (with tools) but not in A (with text content)
+- Extracts reasoning markers from the tool call output using PEG parsers
+
+### Phase 2: Content Analysis
+
+**C1**: Two comparisons in the `analyze_content` constructor:
+
+- Comparison 1: content-only output vs tool-call output → `diff_tools`
+- Comparison 2: content-only output vs reasoning+empty-content output → `diff_reasoning`
+
+Classification logic:
+
+- `PLAIN`: `diff_tools.left` equals the response string (content is the entire diff, no wrapper)
+- `ALWAYS_WRAPPED`: markers found surrounding the content text in `pure_content` → extracts `start`/`end`
+
+### Phase 3: Tool Call Analysis
+
+**T1 — `analyze_tool_calls()`**: Compares no-tools vs with-tools output.
+
+- Extracts the tool call section as `diff.right`
+- Calls `analyze_tool_call_format()` which first strips reasoning markers from the haystack, then:
+  - Calls `in_json_haystack()` for both function name and argument name needles
+  - `in_json_haystack()` uses a PEG parser to check whether the needle appears in a JSON context (preceded by `{` or `:` with surrounding quotes)
+  - If function name is in JSON → `JSON_NATIVE` → `analyze_tool_call_format_json_native()`
+  - If function name not in JSON, arg name is in JSON → `TAG_WITH_JSON`
+  - If neither in JSON → `TAG_WITH_TAGGED`
+  - `analyze_tool_call_format_json_native()`: parses the JSON object, matches field values to needles to populate `name_field`, `args_field`, `id_field`, `gen_id_field`; detects `tools_array_wrapped`; extracts `section_start`/`section_end`
+  - `analyze_tool_call_format_non_json()`: uses PEG parsers on the haystack to find up to two opening markers (section + per-call) then up to two closing markers
+
+**T2 — `check_per_call_markers()`**: Compares 1 call vs 2 calls.
+
+- Computes a secondary diff of the second call portion vs the common suffix
+- If the second call content starts with `section_start` → the section marker is actually per-call → moves `section_start/end` to `per_call_start/end` and clears the section markers
+
+**T3 — `extract_function_markers()`**: Compares function name `FUN_FIRST` vs `FUN_SECOND` (two different named functions).
+
+- Finds where the function name appears in `diff.left`
+- Extracts `function.name_prefix` from the common prefix up to the function marker, and `function.name_suffix` from after the name up to the next marker
+- Extends `name_suffix` into `diff.suffix` (to the first marker for TAG_WITH_TAGGED; to the first `{` or `[` for TAG_WITH_JSON)
+- Extracts `function.close` from after the last argument value up to the per-call/section end marker
+
+**T4 — `analyze_arguments()`** (TAG_WITH_TAGGED only):
+
+- **A1 `extract_argument_name_markers()`**: Compares `arg_name_A` vs `arg_name_B` (two different argument names).
+  - Finds shared surrounding structure → `arguments.name_prefix`, `arguments.name_suffix`
+- **A2 `extract_argument_value_markers()`**: Compares argument value `"XXXX"` vs `"YYYY"` (same arg, different value).
+  - Finds markers surrounding the value → `arguments.value_prefix`, `arguments.value_suffix`
+
+**T5 — `extract_argument_separator()`**: Compares 1 argument vs 2 arguments (same function).
+
+- Uses `until_common_prefix(diff.right, ARG_FIRST, ARG_SECOND)` to find what separates the two argument blocks
+
+**T6 — `extract_args_markers()`**: Compares 0 arguments vs 1 argument.
+
+- Uses `until_common_prefix()` and `after_common_suffix()` with the empty and single-arg JSON strings as anchors to find container markers (`arguments.start`, `arguments.end`)
+
+**T7 — `extract_call_id_markers()`**: Compares call IDs `"call00001"` vs `"call99999"`.
+
+- Determines whether function name appears in `diff.prefix` or `diff.suffix` to classify position:
+  - Function name in prefix only → `BETWEEN_FUNC_AND_ARGS` or `POST_ARGS` (further distinguished by where `{` appears)
+  - Function name in suffix only → `PRE_FUNC_NAME`
+- Extracts `call_id.prefix` and `call_id.suffix` markers around the call ID value
+- Clears `per_call_end` if it incorrectly incorporated the call ID suffix
+
+### Workarounds
+
+A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches after analysis. Each workaround is a lambda that inspects the template source and overrides analysis results. Current workarounds:
+
+1. **Old Qwen/DeepSeek thinking templates** — source contains `content.split('</think>')` but not `<SPECIAL_12>`: sets `reasoning.mode = TAG_BASED` with `<think>`/`</think>` markers if no reasoning was detected
+2. **Granite 3.3** — source contains specific "Write your thoughts" text: forces `TAG_BASED` reasoning with `<think>`/`</think>` and `WRAPPED_WITH_REASONING` content with `<response>`/`</response>`
+3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
+4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
+5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
+
+### Parser Building
+
+Each analyzer struct (`analyze_reasoning`, `analyze_content`, `analyze_tools`) implements `build_parser(parser_build_context&)`. They share a `parser_build_context` that carries the PEG builder, inference inputs, the pre-built reasoning parser, and a pointer to the content analyzer.
+
+#### Reasoning Parser (`analyze_reasoning::build_parser`)
+
+| Mode                                          | Parser                                                                    |
+|-----------------------------------------------|---------------------------------------------------------------------------|
+| Not extracting reasoning                      | `eps()`                                                                   |
+| `TAG_BASED` or `TOOLS_ONLY` (non-empty start) | `optional(start + reasoning(until(end)) + end + space())`                 |
+| `TAG_BASED` or `TOOLS_ONLY` (empty start)     | `optional(reasoning(until(end)) + end + space())` — delimiter-style       |
+
+Note: The start marker may be empty either because the analyzer detected delimiter-style reasoning, or because `generate_parser()` cleared a template artifact start marker (see Generation Prompt & Reasoning Prefill above). Whitespace-only reasoning content (e.g. from a `<think></think>` prefill) is discarded by the mapper.
+
+#### Content Parser (`analyze_content::build_parser`)
+
+| Condition                              | Parser                                                                          |
+|----------------------------------------|---------------------------------------------------------------------------------|
+| `json_schema` present                  | `reasoning + space() + content(schema(json(), "response-format", ...)) + end()` |
+| Tools present                          | Dispatches to `analyze_tools::build_parser()`                                   |
+| `ALWAYS_WRAPPED` with reasoning        | `reasoning + start + content(until(end)) + end + end()`                         |
+| `ALWAYS_WRAPPED` without reasoning     | `content(until(start)) + start + content(until(end)) + end + end()`             |
+| Default (PLAIN)                        | `reasoning + content(rest()) + end()`                                           |
+
+#### Tool Parsers (`analyze_tools::build_parser`)
+
+Dispatches by `format.mode`:
+
+**`build_tool_parser_json_native()`**: Calls `p.standard_json_tools()` which internally dispatches to:
+
+- `build_json_tools_function_is_key()` — function name is the JSON key: `{"get_weather": {...}}`
+- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
+- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
+
+Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
+
+**`build_tool_parser_tag_json()`**: For each tool function:
+
+```text
+tool_open(name_prefix + tool_name(literal(name)) + name_suffix) +
+    call_id_section +
+    tool_args(schema(json(), tool_schema))
+  [+ function.close if non-empty]
+```
+
+Wrapped in per-call markers (with optional parallel call repetition) then optionally in section markers.
+
+**`build_tool_parser_tag_tagged()`**: For each tool function, builds one parser per argument:
+
+- String types: `tool_arg_string_value(schema(until(value_suffix), ...))`
+- JSON types: `tool_arg_json_value(schema(json(), ...))`
+- Required args are plain; optional args wrapped in `optional()`
+- Arguments joined with `space()` between consecutive parsers
+
+For closing: uses `function.close` if present; otherwise uses `peek(per_call_end)` to avoid premature close during partial streaming; falls back to `tool_close(space())` to trigger mapper callbacks.
+
+All three tool parsers return:
+
+```text
+reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
+```
+
+Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker).
+
+## Mapper
+
+`common_chat_peg_mapper` maps PEG parse results (AST nodes) into `common_chat_msg` structures. Key design:
+
+- **Buffered arguments**: Before `tool_name` is known, argument text goes to `args_buffer`; once the name is set, the buffer is flushed to `current_tool->arguments`
+- **`args_target()`**: Returns a reference to whichever destination is currently active (buffer or tool args), eliminating branching
+- **`closing_quote_pending`**: Tracks whether a closing `"` needs to be appended when a string argument value is finalized (for schema-declared string types in tagged format)
+- **Whitespace-only reasoning**: Reasoning content that consists entirely of whitespace (e.g. from a `<think></think>` prefill) is cleared so the message shows no reasoning
+- **Brace auto-closing**: At tool close, unclosed `{` braces are closed automatically
+
+## Files
+
+| File                                      | Purpose                                                                         |
+|-------------------------------------------|---------------------------------------------------------------------------------|
+| `common/chat-auto-parser.h`               | All analysis structs, enums, `autoparser`, `peg_generator`, `generation_params` |
+| `common/chat-auto-parser-generator.cpp`   | Parser generator: `generate_parser()` and `build_parser()` methods              |
+| `common/chat-diff-analyzer.cpp`           | Differential analysis implementation and workarounds                            |
+| `common/chat-auto-parser-helpers.h/cpp`   | `calculate_diff_split()`, `segmentize_markers()`, `compare_variants()`,         |
+|                                           | `wrap_for_generation_prompt()`, string helpers                                  |
+| `common/chat-peg-parser.h/cpp`            | `common_chat_peg_builder`, `common_chat_peg_mapper`, and helpers                |
+| `common/chat.cpp`                         | Entry point: `common_chat_templates_apply_jinja()`                              |
+| `tools/parser/debug-template-parser.cpp`  | Debug tool for template analysis                                                |
+| `tools/parser/template-analysis.cpp`      | Template analysis tool                                                          |
+
+## Testing & Debugging
+
+### Debug Tools
+
+**Template Debugger**: `tools/parser/debug-template-parser.cpp`
+
+- Usage: `./bin/llama-debug-template-parser path/to/template.jinja`
+- Shows detected format, markers, generated parser, and GBNF grammar
+
+**Template Analysis**: `tools/parser/template-analysis.cpp`
+
+- Usage: `./bin/llama-template-analysis path/to/template.jinja`
+
+**Debug Logging**: Enable with `LLAMA_LOG_VERBOSITY=2`
+
+- Shows detailed analysis steps, pattern extraction results, and generated parser structure
+
+**PEG Test Builder**: Fluent API for creating test cases — see [tests/test-chat.cpp:947-1043](tests/test-chat.cpp#L947-L1043). Example usage:
+
+```cpp
+auto tst = peg_tester("models/templates/Template.jinja");
+tst.test("input text")
+   .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+   .tools({tool_json})
+   .parallel_tool_calls(true)
+   .enable_thinking(true)
+   .expect(expected_message)
+   .run();
+```
+
+### Tested Templates
+
+The following templates have active tests in `tests/test-chat.cpp`:
+
+| Template | Format | Notes |
+| -------- | ------ | ----- |
+| Ministral-3-14B-Reasoning | Reasoning | `[THINK]...[/THINK]` tags (specialized handler) |
+| NVIDIA-Nemotron-3-Nano-30B | TAG_WITH_TAGGED | Reasoning + tools |
+| CohereForAI Command-R7B | JSON_NATIVE | `<\|START_THINKING\|>`/`<\|START_RESPONSE\|>` markers |
+| Google Gemma 2 2B | Content only | No tool support |
+| Qwen-QwQ-32B | Reasoning | Forced-open thinking |
+| NousResearch Hermes 2 Pro | JSON_NATIVE | `<tool_call>` wrapper |
+| IBM Granite 3.3 | JSON_NATIVE | `<think></think>` + `<response></response>` |
+| ByteDance Seed-OSS | TAG_WITH_TAGGED | Custom `<seed:think>` and `<seed:tool_call>` tags |
+| Qwen3-Coder | TAG_WITH_TAGGED | XML-style tool format |
+| DeepSeek V3.1 | JSON_NATIVE | Forced thinking mode |
+| GLM-4.6 | TAG_WITH_TAGGED | `<tool_call>name\n<arg_key>...<arg_value>...` format |
+| GLM-4.7-Flash | TAG_WITH_TAGGED | Updated GLM format |
+| Kimi-K2-Thinking | JSON_NATIVE | Reasoning + JSON tools |
+| Apertus-8B-Instruct | JSON_NATIVE | Function name as JSON key |
+| MiniMax-M2 | TAG_WITH_JSON | XML invoke with JSON args |
+| NVIDIA-Nemotron-Nano-v2 | JSON_NATIVE | `<TOOLCALL>` wrapper (nested) |
+| CohereForAI Command-R Plus | JSON_NATIVE | Markdown code block format |
+| Mistral-Nemo-Instruct-2407 | JSON_NATIVE | `[TOOL_CALLS]` wrapper with ID field |
+| Functionary v3.1 | TAG_WITH_JSON | `<function=X>` format |
+| Functionary v3.2 | Specialized | `>>>` recipient delimiter (dedicated handler) |
+| Fireworks Firefunction v2 | TAG_WITH_JSON | Fireworks tool format |
+| DeepSeek R1 Distill (Llama/Qwen) | Reasoning | Forced-open thinking |
+| llama-cpp-deepseek-r1 | Reasoning | Forced-open thinking |
+| Kimi-K2 / Kimi-K2-Instruct | JSON_NATIVE | JSON tools with special markers |
+| Llama 3.1/3.2/3.3 | JSON_NATIVE | Standard Llama tool format |
+| OpenAI GPT-OSS | Specialized | Channel-based (dedicated handler) |
+| Apriel 1.5 | JSON_NATIVE | `<tool_calls>` wrapper with JSON array |
+| Apriel 1.6 Thinker | Reasoning | Implicit reasoning start |
+| Mistral Small 3.2 | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` with call ID |
+| Devstral | JSON_NATIVE | `[TOOL_CALLS]func[ARGS]{...}` without call ID |
+| StepFun 3.5 Flash | TAG_WITH_TAGGED | `<function=X><parameter=Y>` format |
+
+## Adding Support for New Templates
+
+To support a new template format:
+
+1. **If it follows standard patterns** — The auto-parser should detect it automatically. Run `llama-debug-template-parser` to verify markers are correctly extracted.
+2. **If differential analysis extracts incorrect markers** — Add a workaround lambda to the `workarounds` vector in `common/chat-diff-analyzer.cpp`. Inspect the template source for a unique identifying substring.
+3. **If it needs fundamentally different handling** — Add a dedicated handler function in `chat.cpp` before the auto-parser block (as done for GPT-OSS, Functionary v3.2, and Ministral).
+
+## Edge Cases and Quirks
+
+1. **Generation Prompt & Reasoning Prefill**: The generation prompt is extracted by diffing `add_generation_prompt=false` vs `true` in `common_chat_templates_apply_jinja`, so it contains exactly what the template appends — avoiding false positives from prior conversation turns.
+2. **Per-Call vs Per-Section Markers**: Some templates wrap each tool call individually (`per_call_start/end`); others wrap the entire section (`section_start/end`). T2 (`check_per_call_markers()`) disambiguates by checking if the second call in a two-call output starts with the section marker.
+3. **Tag Boundary Fixing**: `calculate_diff_split()` iteratively adjusts prefix/suffix boundaries to avoid splitting `<tag>` or `[marker]` tokens, ensuring clean extraction.
+4. **Call ID Side Effects**: When a call ID is detected, `per_call_end` may have been incorrectly set to include the call ID suffix. T7 clears `per_call_end` in this case.
+5. **Tool Analysis Gating**: `analyze_tools` is only constructed (and all tool analysis phases run) when `jinja_caps.supports_tool_calls` is true. Within tool analysis, `check_per_call_markers()` (T2) only runs if `jinja_caps.supports_parallel_tool_calls`.
+6. **`analyze_arguments()` Gating**: Within tool analysis, A1 and A2 (argument name/value marker extraction) only run for `TAG_WITH_TAGGED` format. `extract_argument_separator()` and `extract_args_markers()` run for all non-`JSON_NATIVE` formats.
+7. **Undetected Tool Format**: If `analyze_tools` concludes tool calling is supported but cannot determine the format, `build_parser()` logs an error and returns `eps()` (graceful degradation) rather than aborting.
diff --git a/docs/development/parsing.md b/docs/development/parsing.md
index 113ab2e2ee..d461b12f6b 100644
--- a/docs/development/parsing.md
+++ b/docs/development/parsing.md
@@ -22,7 +22,7 @@ Below is a contrived example demonstrating how to use the PEG parser to parse
 output from a model that emits arguments as JSON.
 
 ```cpp
-auto parser = build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
     // Build a choice of all available tools
     auto tool_choice = p.choice();
     for (const auto & tool : tools) {
@@ -212,7 +212,7 @@ mapper.from_ast(ctx.ast, result);
 
 ### Native
 
-The `common_chat_peg_native_builder` builds a `native` parser suitable for
+The `common_chat_peg_builder` builds a `native` parser suitable for
 models that emit tool arguments as a direct JSON object.
 
 - **`reasoning(p)`** - Tag node for `reasoning_content`
@@ -225,7 +225,7 @@ models that emit tool arguments as a direct JSON object.
 - **`tool_args(p)`** - Tag the tool arguments
 
 ```cpp
-build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
     auto get_weather_tool = p.tool(p.sequence({
         p.tool_open(p.literal("{")),
         p.json_member("name", "\"" + p.tool_name(p.literal("get_weather")) + "\""),
@@ -246,7 +246,7 @@ build_chat_peg_native_parser([&](common_chat_peg_native_parser & p) {
 
 ### Constructed
 
-The `common_chat_peg_constructed_builder` builds a `constructed` parser
+The `common_chat_peg_builder` builds a `constructed` parser
 suitable for models that emit tool arguments as separate entities, such as XML
 tags.
 
@@ -264,7 +264,7 @@ tags.
 - **`tool_arg_json_value(p)`** - Tag JSON value for the argument
 
 ```cpp
-build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+build_chat_peg_parser([&](common_chat_peg_builder & p) {
     auto location_arg = p.tool_arg(
         p.tool_arg_open("<parameter name=\"" + p.tool_arg_name(p.literal("location")) + "\">"),
         p.tool_arg_string_value(p.until("</parameter>")),
diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py
index 886dd3d81e..e62bbaf8c6 100755
--- a/examples/json_schema_to_grammar.py
+++ b/examples/json_schema_to_grammar.py
@@ -633,7 +633,7 @@ def add_component(comp_schema, is_required):
             return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None))
 
         elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
-            items = schema.get('items') or schema['prefixItems']
+            items = schema.get('items', schema.get('prefixItems'))
             if isinstance(items, list):
                 return self._add_rule(
                     rule_name,
@@ -689,6 +689,11 @@ def add_component(comp_schema, is_required):
         elif (schema_type == 'object') or (len(schema) == 0):
             return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
 
+        elif schema_type is None and isinstance(schema, dict):
+            # No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+            # Per JSON Schema semantics this is equivalent to {} and accepts any value.
+            return self._add_rule(rule_name, self._add_primitive('value', PRIMITIVE_RULES['value']))
+
         else:
             assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
             # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
diff --git a/examples/parser/CMakeLists.txt b/examples/parser/CMakeLists.txt
new file mode 100644
index 0000000000..55e0c63437
--- /dev/null
+++ b/examples/parser/CMakeLists.txt
@@ -0,0 +1,20 @@
+if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
+    # this tool is disabled on Windows when building with shared libraries because it uses internal functions not exported with LLAMA_API
+    set(TARGET llama-debug-template-parser)
+    add_executable(${TARGET} debug-template-parser.cpp)
+    target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+    target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+    if(LLAMA_TOOLS_INSTALL)
+        install(TARGETS ${TARGET} RUNTIME)
+    endif()
+endif()
+
+set(TARGET llama-template-analysis)
+add_executable(${TARGET} template-analysis.cpp)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+if(LLAMA_TOOLS_INSTALL)
+    install(TARGETS ${TARGET} RUNTIME)
+endif()
diff --git a/examples/parser/debug-template-parser.cpp b/examples/parser/debug-template-parser.cpp
new file mode 100644
index 0000000000..9c591a1f11
--- /dev/null
+++ b/examples/parser/debug-template-parser.cpp
@@ -0,0 +1,462 @@
+#include "../src/llama-grammar.h"
+#include "chat-auto-parser.h"
+#include "chat.h"
+#include "common.h"
+#include "gguf.h"
+#include "jinja/runtime.h"
+#include "log.h"
+#include "nlohmann/json.hpp"
+#include "peg-parser.h"
+
+#include <fstream>
+#include <numeric>
+#include <optional>
+#include <sstream>
+#include <string>
+
+using json = nlohmann::ordered_json;
+
+enum class output_mode {
+    ANALYSIS,  // Only output analysis results (default)
+    TEMPLATE,  // Only output rendered template
+    BOTH       // Output both
+};
+
+enum class input_message_type {
+    NONE,                    // Don't render any message scenarios (only analysis)
+    CONTENT_ONLY,            // Simple assistant message with content
+    REASONING_CONTENT,       // Message with reasoning_content + content
+    TOOL_CALL_ONLY,          // Message with tool_calls only
+    CONTENT_TOOL_CALL,       // Message with content + tool_calls
+    REASONING_TOOL_CALL,     // Message with reasoning_content + tool_calls
+    CONTENT_FAKE_TOOL_CALL,  // Message with content but no actual tool_calls (for testing)
+    ALL                      // Render all scenarios
+};
+
+struct debug_options {
+    std::string        template_path;
+    bool               with_tools        = true;
+    bool               generation_prompt = true;
+    bool               enable_reasoning  = true;
+    bool               debug_jinja       = false;
+    bool               force_tool_call   = false;
+    output_mode        mode              = output_mode::BOTH;
+    input_message_type input_message     = input_message_type::NONE;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static std::string read_gguf_chat_template(const std::string & path) {
+    struct gguf_init_params params = { /*no_alloc =*/true,  // We only need metadata, not tensor data
+                                       /*ctx=*/nullptr };
+
+    struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
+    if (ctx == nullptr) {
+        throw std::runtime_error("Could not open GGUF file: " + path);
+    }
+
+    const char * key    = "tokenizer.chat_template";
+    int64_t      key_id = gguf_find_key(ctx, key);
+
+    if (key_id == -1) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file does not contain chat template key: " + std::string(key));
+    }
+
+    const char * template_str = gguf_get_val_str(ctx, key_id);
+    if (template_str == nullptr) {
+        gguf_free(ctx);
+        throw std::runtime_error("GGUF file contains chat template key but value is null");
+    }
+
+    std::string result = template_str;
+    gguf_free(ctx);
+    return result;
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s <template_or_gguf_path> [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --no-tools              Disable tool definitions\n");
+    LOG_ERR("  --force-tool-call       Set tool calls to forced\n");
+    LOG_ERR("  --generation-prompt=0|1 Set add_generation_prompt (default: 1)\n");
+    LOG_ERR("  --enable-reasoning=0|1  Enable reasoning parsing (default: 1)\n");
+    LOG_ERR("  --output=MODE           Output mode: analysis, template, both (default: both)\n");
+    LOG_ERR("  --debug-jinja           Enable Jinja fine-grained debug\n");
+    LOG_ERR("  --input-message=TYPE    Message type to render:\n");
+    LOG_ERR("                          content_only, reasoning_content, tool_call_only,\n");
+    LOG_ERR("                          content_tool_call, reasoning_tool_call,\n");
+    LOG_ERR("                          content_fake_tool_call, all\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s template.jinja --input-message=all --generation-prompt=1\n", program_name);
+    LOG_ERR("  %s template.jinja --output=template --input-message=tool_call_only\n", program_name);
+}
+
+static bool parse_bool_option(const std::string & value) {
+    return value == "1" || value == "true" || value == "yes";
+}
+
+static bool parse_options(int argc, char ** argv, debug_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    opts.template_path = argv[1];
+
+    for (int i = 2; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--force-tool-call") {
+            opts.force_tool_call = true;
+        } else if (arg == "--debug-jinja") {
+            opts.debug_jinja = true;
+        } else if (arg == "--no-tools") {
+            opts.with_tools = false;
+        } else if (arg.rfind("--generation-prompt=", 0) == 0) {
+            opts.generation_prompt = parse_bool_option(arg.substr(20));
+        } else if (arg.rfind("--enable-reasoning=", 0) == 0) {
+            opts.enable_reasoning = parse_bool_option(arg.substr(19));
+        } else if (arg.rfind("--output=", 0) == 0) {
+            std::string mode = arg.substr(9);
+            if (mode == "analysis") {
+                opts.mode = output_mode::ANALYSIS;
+            } else if (mode == "template") {
+                opts.mode = output_mode::TEMPLATE;
+            } else if (mode == "both") {
+                opts.mode = output_mode::BOTH;
+            } else {
+                LOG_ERR("Unknown output mode: %s\n", mode.c_str());
+                return false;
+            }
+        } else if (arg.rfind("--input-message=", 0) == 0) {
+            std::string type = arg.substr(16);
+            if (type == "content_only") {
+                opts.input_message = input_message_type::CONTENT_ONLY;
+            } else if (type == "reasoning_content") {
+                opts.input_message = input_message_type::REASONING_CONTENT;
+            } else if (type == "tool_call_only") {
+                opts.input_message = input_message_type::TOOL_CALL_ONLY;
+            } else if (type == "content_tool_call") {
+                opts.input_message = input_message_type::CONTENT_TOOL_CALL;
+            } else if (type == "reasoning_tool_call") {
+                opts.input_message = input_message_type::REASONING_TOOL_CALL;
+            } else if (type == "content_fake_tool_call") {
+                opts.input_message = input_message_type::CONTENT_FAKE_TOOL_CALL;
+            } else if (type == "all") {
+                opts.input_message = input_message_type::ALL;
+            } else {
+                LOG_ERR("Unknown input message type: %s\n", type.c_str());
+                return false;
+            }
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static json build_user_message() {
+    return json{
+        { "role",    "user"                               },
+        { "content", "Hello, please help me with a task." }
+    };
+}
+
+static json build_content_only_message() {
+    return json{
+        { "role",    "assistant"                                   },
+        { "content", "Hello! I'm here to help you with your task." }
+    };
+}
+
+static json build_reasoning_content_message() {
+    return json{
+        { "role",              "assistant"                                                               },
+        { "content",           "Hello! I'm here to help you with your task."                             },
+        { "reasoning_content", "The user is greeting me and asking for help. I should respond politely." }
+    };
+}
+
+static json build_tool_call_only_message() {
+    return json{
+        { "role",       "assistant"      },
+        { "content",    nullptr          },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function", json{ { "name", "test_function_name" },
+                                  { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } },
+              { "id", "123456789" } } }) }
+    };
+}
+
+static json build_content_tool_call_message() {
+    return json{
+        { "role",       "assistant"                                                                              },
+        { "content",    "I'll help you by calling a function."                                                   },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_reasoning_tool_call_message() {
+    return json{
+        { "role",              "assistant"                                                                       },
+        { "content",           nullptr                                                                           },
+        { "reasoning_content", "I need to call a function to help with this task."                               },
+        { "tool_calls",
+         json::array({ json{
+              { "type", "function" },
+              { "function",
+                json{ { "name", "test_function_name" },
+                      { "arguments", json::object({ { "param1", "value1" }, { "param2", "value2" } }) } } } } }) }
+    };
+}
+
+static json build_content_fake_tool_call_message() {
+    // This message has content but NO tool_calls field
+    // It's used to test if a template renders tool definitions but not tool calls
+    return json{
+        { "role",    "assistant"                            },
+        { "content", "I'll help you by calling a function." }
+    };
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+static void render_scenario(const common_chat_template & tmpl,
+                            const std::string &          scenario_name,
+                            const json &                 messages,
+                            const json &                 tools,
+                            bool                         add_generation_prompt,
+                            bool                         enable_thinking) {
+    LOG_ERR("\n=== Scenario: %s ===\n", scenario_name.c_str());
+    LOG_ERR("add_generation_prompt: %s, enable_thinking: %s\n", add_generation_prompt ? "true" : "false",
+            enable_thinking ? "true" : "false");
+
+    // When add_generation_prompt is true, add a trailing user message to trigger the prompt
+    json final_messages = messages;
+    if (add_generation_prompt && !messages.empty() && messages.back().value("role", "") == "assistant") {
+        final_messages.push_back(json{
+            { "role",    "user"                                       },
+            { "content", "Now please continue with another response." }
+        });
+    }
+
+    LOG_ERR("Messages:\n%s\n", final_messages.dump(2).c_str());
+
+    try {
+        autoparser::generation_params inputs;
+        inputs.messages                         = final_messages;
+        inputs.add_generation_prompt            = add_generation_prompt;
+        inputs.extra_context["enable_thinking"] = enable_thinking;
+
+        if (!tools.is_null() && tools.is_array() && !tools.empty()) {
+            inputs.tools = tools;
+        }
+
+        std::string output = common_chat_template_direct_apply(tmpl, inputs);
+
+        LOG_ERR("\n--- Rendered Output ---\n");
+        LOG_ERR("%s\n", output.c_str());
+        LOG_ERR("--- End Output (length: %zu) ---\n", output.length());
+    } catch (const std::exception & e) {
+        LOG_ERR("Rendering failed: %s\n", e.what());
+    }
+}
+
+static void render_all_scenarios(const common_chat_template & tmpl,
+                                 const json &                 tools,
+                                 bool                         add_generation_prompt,
+                                 bool                         enable_thinking,
+                                 input_message_type           message_type) {
+    json user_msg = build_user_message();
+
+    auto render_if = [&](input_message_type type, const std::string & name, const json & assistant_msg) {
+        if (message_type == input_message_type::ALL || message_type == type) {
+            json messages = json::array({ user_msg, assistant_msg });
+            render_scenario(tmpl, name, messages, tools, add_generation_prompt, enable_thinking);
+        }
+    };
+
+    render_if(input_message_type::CONTENT_ONLY, "content_only", build_content_only_message());
+    render_if(input_message_type::REASONING_CONTENT, "reasoning_content", build_reasoning_content_message());
+    render_if(input_message_type::TOOL_CALL_ONLY, "tool_call_only", build_tool_call_only_message());
+    render_if(input_message_type::CONTENT_TOOL_CALL, "content_tool_call", build_content_tool_call_message());
+    render_if(input_message_type::REASONING_TOOL_CALL, "reasoning_tool_call", build_reasoning_tool_call_message());
+    render_if(input_message_type::CONTENT_FAKE_TOOL_CALL, "content_fake_tool_call",
+              build_content_fake_tool_call_message());
+
+    // Also render with add_generation_prompt=true to show the prompt ending
+    if (message_type == input_message_type::ALL) {
+        LOG_ERR("\n\n=== Generation Prompt Scenarios (add_generation_prompt=true) ===\n");
+
+        json prompt_messages = json::array({ user_msg });
+        render_scenario(tmpl, "generation_prompt_only", prompt_messages, tools, true, enable_thinking);
+
+        // With enable_thinking toggled
+        render_scenario(tmpl, "generation_prompt_thinking_disabled", prompt_messages, tools, true, false);
+    }
+}
+
+static autoparser::generation_params prepare_params(const debug_options & opts, const json & tools) {
+    autoparser::generation_params params;
+    params.messages         = json::array({ build_user_message() });
+    params.reasoning_format = opts.enable_reasoning ? COMMON_REASONING_FORMAT_DEEPSEEK : COMMON_REASONING_FORMAT_NONE;
+    params.enable_thinking  = opts.enable_reasoning;
+    params.add_generation_prompt = opts.generation_prompt;
+
+    if (opts.with_tools) {
+        params.tools       = tools;
+        params.tool_choice = opts.force_tool_call ? COMMON_CHAT_TOOL_CHOICE_REQUIRED : COMMON_CHAT_TOOL_CHOICE_AUTO;
+    } else {
+        params.tools       = json();
+        params.tool_choice = COMMON_CHAT_TOOL_CHOICE_NONE;
+    }
+    params.parallel_tool_calls = false;
+    return params;
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to most verbose to capture all debug output
+    common_log_set_verbosity_thold(99);
+
+    debug_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    if (opts.debug_jinja || std::getenv("LLAMA_DEBUG_JINJA") != nullptr) {
+        jinja::enable_debug(true);
+    }
+
+    std::string template_source;
+    try {
+        // Check if the file is a GGUF file
+        if (opts.template_path.size() >= 5 &&
+            opts.template_path.compare(opts.template_path.size() - 5, 5, ".gguf") == 0) {
+            template_source = read_gguf_chat_template(opts.template_path);
+        } else {
+            template_source = read_file(opts.template_path);
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return 1;
+    }
+
+    LOG_ERR("Analyzing template: %s\n", opts.template_path.c_str());
+    LOG_ERR("Options: with_tools=%s, generation_prompt=%s, enable_reasoning=%s\n", opts.with_tools ? "true" : "false",
+            opts.generation_prompt ? "true" : "false", opts.enable_reasoning ? "true" : "false");
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+
+        json tools = opts.with_tools ? build_tools_definition() : json();
+
+        autoparser::generation_params params = prepare_params(opts, tools);
+        common_chat_params            parser_data;
+        if (std::optional<common_chat_params> spec_tmpl =
+                common_chat_try_specialized_template(chat_template, template_source, params)) {
+            LOG_ERR("\n");
+            LOG_ERR("This template uses a specialized parser, analysis results will not be available.");
+            parser_data = *spec_tmpl;
+        } else {
+            // Render template scenarios if requested
+            if (opts.input_message != input_message_type::NONE &&
+                (opts.mode == output_mode::TEMPLATE || opts.mode == output_mode::BOTH)) {
+                LOG_ERR("\n");
+                LOG_ERR("================================================================================\n");
+                LOG_ERR("                         TEMPLATE RENDERING OUTPUT\n");
+                LOG_ERR("================================================================================\n");
+
+                render_all_scenarios(chat_template, tools, opts.generation_prompt, opts.enable_reasoning,
+                                     opts.input_message);
+            }
+
+            // Output analysis if requested
+            if (opts.mode == output_mode::ANALYSIS || opts.mode == output_mode::BOTH) {
+                LOG_ERR("\n");
+                LOG_ERR("================================================================================\n");
+                LOG_ERR("                           TEMPLATE ANALYSIS\n");
+                LOG_ERR("================================================================================\n");
+
+                autoparser::autoparser analysis;
+                analysis.analyze_template(chat_template);
+
+                // Generate Parser
+                parser_data = autoparser::peg_generator::generate_parser(chat_template, params, analysis);
+            }
+
+            LOG_ERR("\n=== Generated Parser ===\n");
+            common_peg_arena arena;
+            arena.load(parser_data.parser);
+            LOG_ERR("%s\n", arena.dump(arena.root()).c_str());
+
+            LOG_ERR("\n=== Generated Grammar ===\n");
+            LOG_ERR("%s\n", parser_data.grammar.c_str());
+
+            LOG_ERR("\n=== Generated Lazy Grammar ===\n");
+            LOG_ERR("%d\n", parser_data.grammar_lazy);
+
+            LOG_ERR("\n=== Generated Grammar Triggers ===\n");
+            for (const common_grammar_trigger & cgt : parser_data.grammar_triggers) {
+                LOG_ERR("Token: %d | Type: %d | Value: %s\n", cgt.token, cgt.type, cgt.value.c_str());
+            }
+
+            LOG_ERR("\n=== Preserved Tokens ===\n");
+            for (const std::string & token : parser_data.preserved_tokens) {
+                LOG_ERR("  '%s'\n", token.c_str());
+            }
+
+            if (!parser_data.grammar.empty()) {
+                LOG_ERR("\n=== Verifying created grammar ===\n");
+                auto * grammar = llama_grammar_init_impl(nullptr, parser_data.grammar.c_str(), "root",
+                                                         parser_data.grammar_lazy, nullptr, 0, nullptr, 0);
+                if (grammar != nullptr) {
+                    LOG_ERR("\n=== Grammar successfully created ===\n");
+                }
+            }
+        }
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+        return 1;
+    }
+
+    return 0;
+}
diff --git a/examples/parser/template-analysis.cpp b/examples/parser/template-analysis.cpp
new file mode 100644
index 0000000000..bf898a2290
--- /dev/null
+++ b/examples/parser/template-analysis.cpp
@@ -0,0 +1,611 @@
+#include "chat-auto-parser.h"
+#include "chat-auto-parser-helpers.h"
+#include "chat.h"
+#include "log.h"
+#include "jinja/caps.h"
+#include "jinja/runtime.h"
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "nlohmann/json.hpp"
+
+using json = nlohmann::ordered_json;
+
+// ANSI color codes - using 256-color palette for brighter colors (all bold)
+#define ANSI_RESET       "\033[0m"
+#define ANSI_PURPLE      "\033[1m\x1b[38;5;126m"  // Bold bright purple for main headers
+#define ANSI_CYAN        "\033[1m\x1b[38;5;81m"   // Bold bright cyan for section headers
+#define ANSI_BLUE        "\033[1m\x1b[38;5;12m"   // Bold bright blue for labels
+#define ANSI_ORANGE      "\033[1m\x1b[38;5;209m"  // Bold orange for right differences
+#define ANSI_GREEN       "\033[1m\x1b[38;5;83m"   // Bold bright green for left differences
+#define ANSI_GRAY        "\033[1m\x1b[38;5;240m"  // Bold gray (used for "no variables" message)
+#define ANSI_BOLD        "\033[1m"                // Standalone bold
+#define ANSI_PREFIX      "\033[1m\x1b[38;5;176m"  // Bold color for common prefix
+#define ANSI_SUFFIX      "\033[1m\x1b[38;5;61m"   // Bold color for common suffix
+
+// All template paths extracted from tests/test-chat.cpp
+static const std::vector<std::string> ALL_TEMPLATE_PATHS = {
+    "models/templates/Apertus-8B-Instruct.jinja",
+    "models/templates/Apriel-1.6-15b-Thinker-fixed.jinja",
+    "models/templates/ByteDance-Seed-OSS.jinja",
+    "models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja",
+    "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja",
+    "models/templates/GLM-4.6.jinja",
+    "models/templates/GLM-4.7-Flash.jinja",
+    "models/templates/Kimi-K2-Instruct.jinja",
+    "models/templates/Kimi-K2-Thinking.jinja",
+    "models/templates/MiMo-VL.jinja",
+    "models/templates/MiniMax-M2.jinja",
+    "models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja",
+    "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja",
+    "models/templates/NVIDIA-Nemotron-Nano-v2.jinja",
+    "models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja",
+    "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+    "models/templates/Qwen-QwQ-32B.jinja",
+    "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja",
+    "models/templates/Qwen3-Coder.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja",
+    "models/templates/deepseek-ai-DeepSeek-V3.1.jinja",
+    "models/templates/fireworks-ai-llama-3-firefunction-v2.jinja",
+    "models/templates/google-gemma-2-2b-it.jinja",
+    "models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja",
+    "models/templates/llama-cpp-deepseek-r1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.1.jinja",
+    "models/templates/meetkai-functionary-medium-v3.2.jinja",
+    "models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja",
+    "models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja",
+    "models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja",
+    "models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja",
+    "models/templates/moonshotai-Kimi-K2.jinja",
+    "models/templates/openai-gpt-oss-120b.jinja",
+    "models/templates/unsloth-Apriel-1.5.jinja",
+    "models/templates/unsloth-mistral-Devstral-Small-2507.jinja",
+};
+
+struct analysis_options {
+    std::vector<std::string> template_paths;
+    bool                     analyze_all = false;
+};
+
+static std::string read_file(const std::string & path) {
+    std::ifstream fin(path, std::ios::binary);
+    if (!fin.is_open()) {
+        throw std::runtime_error("Could not open file: " + path);
+    }
+    std::ostringstream buf;
+    buf << fin.rdbuf();
+    return buf.str();
+}
+
+static void print_usage(const char * program_name) {
+    LOG_ERR("Usage: %s [options]\n", program_name);
+    LOG_ERR("\nOptions:\n");
+    LOG_ERR("  --template <name>       Analyze specific template from test suite (e.g., 'deepseek' or 'DeepSeek-V3.1')\n");
+    LOG_ERR("  --template-file <path>  Analyze custom template file\n");
+    LOG_ERR("  --all                   Analyze all templates from test suite\n");
+    LOG_ERR("\nExamples:\n");
+    LOG_ERR("  %s --all\n", program_name);
+    LOG_ERR("  %s --template deepseek\n", program_name);
+    LOG_ERR("  %s --template-file my-template.jinja\n", program_name);
+}
+
+static bool parse_options(int argc, char ** argv, analysis_options & opts) {
+    if (argc < 2) {
+        print_usage(argv[0]);
+        return false;
+    }
+
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+
+        if (arg == "--all") {
+            opts.analyze_all = true;
+        } else if (arg == "--template") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template requires an argument\n");
+                return false;
+            }
+            std::string pattern = argv[++i];
+            std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
+
+            // Find matching templates
+            bool found = false;
+            for (const auto & path : ALL_TEMPLATE_PATHS) {
+                std::string path_lower = path;
+                std::transform(path_lower.begin(), path_lower.end(), path_lower.begin(), ::tolower);
+                if (path_lower.find(pattern) != std::string::npos) {
+                    opts.template_paths.push_back(path);
+                    found = true;
+                }
+            }
+
+            if (!found) {
+                LOG_ERR("No templates found matching: %s\n", pattern.c_str());
+                return false;
+            }
+        } else if (arg == "--template-file") {
+            if (i + 1 >= argc) {
+                LOG_ERR("--template-file requires an argument\n");
+                return false;
+            }
+            opts.template_paths.push_back(argv[++i]);
+        } else {
+            LOG_ERR("Unknown option: %s\n", arg.c_str());
+            print_usage(argv[0]);
+            return false;
+        }
+    }
+
+    if (opts.analyze_all) {
+        opts.template_paths = ALL_TEMPLATE_PATHS;
+    }
+
+    if (opts.template_paths.empty()) {
+        LOG_ERR("No templates specified\n");
+        print_usage(argv[0]);
+        return false;
+    }
+
+    return true;
+}
+
+static json build_tools_definition() {
+    json parameters_schema                    = json::object();
+    parameters_schema["type"]                 = "object";
+    parameters_schema["properties"]           = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        { "type",        "string"          },
+        { "description", "First parameter" }
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        { "type",        "string"           },
+        { "description", "Second parameter" }
+    });
+    parameters_schema["required"]             = json::array({ "param1", "param2" });
+
+    return json::array({
+        json{ { "type", "function" },
+             { "function", json{ { "name", "test_function_name" },
+                                  { "description", "A test function for debugging" },
+                                  { "parameters", parameters_schema } } } }
+    });
+}
+
+// Helper to create a tool call with arguments as JSON object
+static json build_tool_call(const std::string & name, const json & args_object, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args_object}  // Pass as JSON object, not serialized string
+        }}
+    };
+}
+
+// Helper functions to create repeating message definitions
+static json make_user_msg() {
+    return json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+}
+
+static json make_user_msg2() {
+    return json{
+        {"role", "user"},
+        {"content", "Thank you."}
+    };
+}
+
+static json make_user_msg2_continue() {
+    return json{
+        {"role", "user"},
+        {"content", "Continue."}
+    };
+}
+
+static json make_assistant_no_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+}
+
+static json make_assistant_one_tool() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+}
+
+static json make_assistant_two_tools() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+}
+
+static json make_assistant_no_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."}
+    };
+}
+
+static json make_assistant_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", "I can help you with that."},
+        {"reasoning_content", "The user is asking for help. I should respond positively."}
+    };
+}
+
+static json make_assistant_one_tool_with_reasoning() {
+    return json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+}
+
+static void print_diff_split(const std::string & title, const diff_split & diff) {
+    LOG_ERR("\n%s=== %s ===%s\n", ANSI_CYAN, title.c_str(), ANSI_RESET);
+    LOG_ERR("%sCommon Prefix:%s '%s'\n", ANSI_PREFIX, ANSI_RESET, diff.prefix.c_str());
+    LOG_ERR("%sCommon Suffix:%s '%s'\n", ANSI_SUFFIX, ANSI_RESET, diff.suffix.c_str());
+    LOG_ERR("%sLeft (difference):%s '%s'\n", ANSI_GREEN, ANSI_RESET, diff.left.c_str());
+    LOG_ERR("%sRight (difference):%s '%s'\n", ANSI_ORANGE, ANSI_RESET, diff.right.c_str());
+}
+
+static void check_reasoning_variables(const common_chat_template & tmpl) {
+    LOG_ERR("\n%s=== Checking Reasoning Variables ===%s\n", ANSI_CYAN, ANSI_RESET);
+
+    try {
+        // Create a list of candidate reasoning/thinking variable names to probe
+        std::vector<std::string> candidate_vars = {
+            "enable_reasoning",
+            "use_reasoning",
+            "reasoning_enabled",
+            "has_reasoning",
+            "reasoning_mode",
+            "reasoning_format",
+            "reasoning_active",
+            "with_reasoning",
+            "use_thinking",
+            "thinking_enabled",
+            "has_thinking",
+            "thinking_mode",
+            "thinking_format",
+            "thinking_active",
+            "with_thinking",
+            "enable_reason",
+            "reason_enabled",
+            "enable_think",
+            "think_enabled",
+        };
+
+        jinja::context ctx;
+        ctx.is_get_stats = true;
+
+        json messages = json::array({
+            json{
+                {"role", "user"},
+                {"content", "Test message"}
+            },
+            json{
+                {"role", "assistant"},
+                {"content", "Response"},
+                {"reasoning_content", "Some reasoning"}
+            }
+        });
+
+        // Set up base context
+        jinja::global_from_json(ctx, json{
+            {"messages", messages},
+            {"tools", json::array()},
+            {"bos_token", ""},
+            {"eos_token", ""},
+            {"add_generation_prompt", false},
+            {"enable_thinking", true}  // Already passed, so we'll exclude this from results
+        }, true);
+
+        // Add candidate variables as undefined to probe which ones are accessed
+        for (const auto & var_name : candidate_vars) {
+            ctx.set_val(var_name, jinja::mk_val<jinja::value_undefined_t>(var_name));
+        }
+
+        try {
+            jinja::runtime runtime(ctx);
+            runtime.execute(tmpl.prog);
+        } catch (const std::exception & e) {
+            // Execution may fail, that's okay - we just want to see what variables were accessed
+        }
+
+        // Check which candidate variables were accessed (stats.used = true)
+        std::vector<std::string> accessed_vars;
+        for (const auto & var_name : candidate_vars) {
+            auto val = ctx.get_val(var_name);
+            if (!val->is_undefined()) {
+                // Variable was overwritten, skip it
+                continue;
+            }
+            if (val->stats.used) {
+                accessed_vars.push_back(var_name);
+            }
+        }
+
+        if (accessed_vars.empty()) {
+            LOG_ERR("%sNo reasoning/thinking-related variables were queried by the template%s\n", ANSI_GRAY, ANSI_RESET);
+        } else {
+            LOG_ERR("Template queries the following reasoning/thinking-related variables:\n");
+            for (const auto & var : accessed_vars) {
+                LOG_ERR("  %s- %s%s\n", ANSI_ORANGE, var.c_str(), ANSI_RESET);
+            }
+        }
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Error checking reasoning variables: %s\n", e.what());
+    }
+}
+
+static void analyze_template(const std::string & template_path) {
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                    ANALYZING TEMPLATE: %s\n", template_path.c_str());
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    std::string template_source;
+    try {
+        template_source = read_file(template_path);
+    } catch (const std::exception & e) {
+        LOG_ERR("Error reading template: %s\n", e.what());
+        return;
+    }
+
+    try {
+        common_chat_template chat_template(template_source, "", "");
+        json tools = build_tools_definition();
+
+        // ===== CAPABILITIES ANALYSIS =====
+        LOG_ERR("\n%s=== Template Capabilities (from jinja::caps) ===%s\n", ANSI_CYAN, ANSI_RESET);
+        auto caps = chat_template.original_caps();
+        LOG_ERR("%ssupports_tools:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tools ? "true" : "false");
+        LOG_ERR("%ssupports_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_tool_calls ? "true" : "false");
+        LOG_ERR("%ssupports_system_role:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_system_role ? "true" : "false");
+        LOG_ERR("%ssupports_parallel_tool_calls:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_parallel_tool_calls ? "true" : "false");
+        LOG_ERR("%ssupports_typed_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_typed_content ? "true" : "false");
+        LOG_ERR("%ssupports_string_content:%s %s\n", ANSI_BLUE, ANSI_RESET, caps.supports_string_content ? "true" : "false");
+
+        // ===== DIFFERENTIAL ANALYSIS =====
+
+        // Test 1: With and without tools (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_no_tools;
+            params_no_tools.messages = json::array({ user_msg });
+            params_no_tools.add_generation_prompt = false;
+            params_no_tools.tools = json::array();
+
+            autoparser::generation_params params_with_tools = params_no_tools;
+            params_with_tools.tools = tools;
+
+            std::string output_no_tools = common_chat_template_direct_apply(chat_template, params_no_tools);
+            std::string output_with_tools = common_chat_template_direct_apply(chat_template, params_with_tools);
+
+            auto diff = calculate_diff_split(output_no_tools, output_with_tools);
+            print_diff_split("Diff: With vs Without Tools (single user message)", diff);
+        }
+
+        // Test 2: With and without add_generation_prompt (single user message)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_no_prompt;
+            params_no_prompt.messages = json::array({ user_msg });
+            params_no_prompt.add_generation_prompt = false;
+            params_no_prompt.tools = json::array();
+
+            autoparser::generation_params params_with_prompt = params_no_prompt;
+            params_with_prompt.add_generation_prompt = true;
+
+            std::string output_no_prompt = common_chat_template_direct_apply(chat_template, params_no_prompt);
+            std::string output_with_prompt = common_chat_template_direct_apply(chat_template, params_with_prompt);
+
+            auto diff = calculate_diff_split(output_no_prompt, output_with_prompt);
+            print_diff_split("Diff: With vs Without add_generation_prompt (single user message)", diff);
+        }
+
+        // Test 3: Assistant with reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::generation_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant)", diff);
+        }
+
+        // Test 4: Assistant with reasoning_content (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2();
+
+            autoparser::generation_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_no_reasoning(), user_msg2 });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::generation_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_with_reasoning(), user_msg2 });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: With vs Without reasoning_content (user, assistant, user)", diff);
+        }
+
+        // Test 5: Tool call in last assistant message (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool() });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            autoparser::generation_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant)", diff);
+        }
+
+        // Test 6: Tool call in last assistant message (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            autoparser::generation_params params_no_tool;
+            params_no_tool.messages = json::array({ user_msg, make_assistant_no_tool(), user_msg2 });
+            params_no_tool.add_generation_prompt = false;
+            params_no_tool.tools = tools;
+
+            autoparser::generation_params params_with_tool = params_no_tool;
+            params_with_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+
+            std::string output_no_tool = common_chat_template_direct_apply(chat_template, params_no_tool);
+            std::string output_with_tool = common_chat_template_direct_apply(chat_template, params_with_tool);
+
+            auto diff = calculate_diff_split(output_no_tool, output_with_tool);
+            print_diff_split("Diff: With vs Without tool call (user, assistant, user)", diff);
+        }
+
+        // Test 7: One vs two tool calls (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            autoparser::generation_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools() });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant)", diff);
+        }
+
+        // Test 8: One vs two tool calls (user, assistant, user)
+        {
+            json user_msg = make_user_msg();
+            json user_msg2 = make_user_msg2_continue();
+
+            autoparser::generation_params params_one_tool;
+            params_one_tool.messages = json::array({ user_msg, make_assistant_one_tool(), user_msg2 });
+            params_one_tool.add_generation_prompt = false;
+            params_one_tool.tools = tools;
+
+            autoparser::generation_params params_two_tools = params_one_tool;
+            params_two_tools.messages = json::array({ user_msg, make_assistant_two_tools(), user_msg2 });
+
+            std::string output_one_tool = common_chat_template_direct_apply(chat_template, params_one_tool);
+            std::string output_two_tools = common_chat_template_direct_apply(chat_template, params_two_tools);
+
+            auto diff = calculate_diff_split(output_one_tool, output_two_tools);
+            print_diff_split("Diff: One vs Two tool calls (user, assistant, user)", diff);
+        }
+
+        // Test 9: Tool call with vs without reasoning_content (user, assistant)
+        {
+            json user_msg = make_user_msg();
+
+            autoparser::generation_params params_no_reasoning;
+            params_no_reasoning.messages = json::array({ user_msg, make_assistant_one_tool() });
+            params_no_reasoning.add_generation_prompt = false;
+            params_no_reasoning.tools = tools;
+            params_no_reasoning.enable_thinking = true;
+
+            autoparser::generation_params params_with_reasoning = params_no_reasoning;
+            params_with_reasoning.messages = json::array({ user_msg, make_assistant_one_tool_with_reasoning() });
+
+            std::string output_no_reasoning = common_chat_template_direct_apply(chat_template, params_no_reasoning);
+            std::string output_with_reasoning = common_chat_template_direct_apply(chat_template, params_with_reasoning);
+
+            auto diff = calculate_diff_split(output_no_reasoning, output_with_reasoning);
+            print_diff_split("Diff: Tool call with vs without reasoning_content (user, assistant)", diff);
+        }
+
+        // Check reasoning variables
+        check_reasoning_variables(chat_template);
+
+    } catch (const std::exception & e) {
+        LOG_ERR("Analysis failed: %s\n", e.what());
+    }
+}
+
+int main(int argc, char ** argv) {
+    // Set log level to capture all output
+    common_log_set_verbosity_thold(99);
+
+    analysis_options opts;
+    if (!parse_options(argc, argv, opts)) {
+        return 1;
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_PURPLE);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      TEMPLATE ANALYSIS TOOL\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+    LOG_ERR("Analyzing %s%zu%s template(s)\n", ANSI_CYAN, opts.template_paths.size(), ANSI_RESET);
+
+    for (const auto & path : opts.template_paths) {
+        analyze_template(path);
+    }
+
+    LOG_ERR("\n");
+    LOG_ERR("%s", ANSI_GREEN);
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("                      ANALYSIS COMPLETE\n");
+    LOG_ERR("================================================================================\n");
+    LOG_ERR("%s", ANSI_RESET);
+
+    return 0;
+}
diff --git a/examples/server/public_legacy/json-schema-to-grammar.mjs b/examples/server/public_legacy/json-schema-to-grammar.mjs
index 38576c45fa..bb25887a14 100644
--- a/examples/server/public_legacy/json-schema-to-grammar.mjs
+++ b/examples/server/public_legacy/json-schema-to-grammar.mjs
@@ -729,6 +729,10 @@ export class SchemaConverter {
       return this._addRule(ruleName, out.join(''));
     } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
       return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
+    } else if (schemaType === undefined && typeof schema === 'object' && !Array.isArray(schema) && schema !== null) {
+      // No type constraint and no recognized structural keywords (e.g. {"description": "..."}).
+      // Per JSON Schema semantics this is equivalent to {} and accepts any value.
+      return this._addRule(ruleName, this._addPrimitive('value', PRIMITIVE_RULES['value']));
     } else {
       if (!(schemaType in PRIMITIVE_RULES)) {
         throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
diff --git a/examples/server/server-common.cpp b/examples/server/server-common.cpp
index 8c1e363a95..61b064606d 100644
--- a/examples/server/server-common.cpp
+++ b/examples/server/server-common.cpp
@@ -781,10 +781,9 @@ json oaicompat_chat_params_parse(
     inputs.json_schema = json_schema.is_null() ? "" : json_schema.dump();
     inputs.grammar = grammar;
     inputs.use_jinja = opt.use_jinja;
-    inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
+    inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", opt.parallel_tool_calls);
     inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
     inputs.reasoning_format = opt.reasoning_format;
-    inputs.use_peg = opt.use_peg;
     if (body.contains("reasoning_format")) {
         inputs.reasoning_format = common_reasoning_format_from_name(body.at("reasoning_format").get<std::string>());
     }
@@ -835,6 +834,7 @@ json oaicompat_chat_params_parse(
         }
         inputs.add_generation_prompt = true;
     }
+    inputs.force_pure_content = opt.force_pure_content;
 
     // Apply chat template to the list of messages
     auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);
@@ -855,16 +855,18 @@ json oaicompat_chat_params_parse(
     llama_params["prompt"] = chat_params.prompt;
     if (!chat_params.grammar.empty()) {
         llama_params["grammar"] = chat_params.grammar;
+        llama_params["grammar_type"] = std::string("tool_calls");
     }
     llama_params["grammar_lazy"] = chat_params.grammar_lazy;
     auto grammar_triggers = json::array();
-    for (const auto& trigger : chat_params.grammar_triggers) {
+    for (const auto & trigger : chat_params.grammar_triggers) {
         server_grammar_trigger ct(trigger);
         grammar_triggers.push_back(ct.to_json());
     }
     llama_params["grammar_triggers"] = grammar_triggers;
     llama_params["preserved_tokens"] = chat_params.preserved_tokens;
-    llama_params["thinking_forced_open"] = chat_params.thinking_forced_open;
+    llama_params["generation_prompt"] = chat_params.generation_prompt;
+
     for (const auto& stop : chat_params.additional_stops) {
         llama_params["stop"].push_back(stop);
     }
@@ -877,6 +879,21 @@ json oaicompat_chat_params_parse(
         throw std::runtime_error("Only one completion choice is allowed");
     }
 
+    // Reasoning budget: pass parameters through to sampling layer
+    {
+        int reasoning_budget = opt.reasoning_budget;
+        if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {
+            reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
+        }
+
+        if (!chat_params.thinking_end_tag.empty()) {
+            llama_params["reasoning_budget_tokens"] = reasoning_budget;
+            llama_params["reasoning_budget_start_tag"] = chat_params.thinking_start_tag;
+            llama_params["reasoning_budget_end_tag"] = chat_params.thinking_end_tag;
+            llama_params["reasoning_budget_message"] = opt.reasoning_budget_message;
+        }
+    }
+
     // Handle "logprobs" field
     // TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future
     if (json_value(body, "logprobs", false)) {
diff --git a/examples/server/server-common.h b/examples/server/server-common.h
index 03dfbdbf77..211c2c28b5 100644
--- a/examples/server/server-common.h
+++ b/examples/server/server-common.h
@@ -245,7 +245,6 @@ json oaicompat_chat_params_parse(const json& body);
 
 struct server_chat_params {
     bool use_jinja;
-    bool use_peg;
     bool prefill_assistant;
     common_reasoning_format reasoning_format;
     std::map<std::string, std::string> chat_template_kwargs;
@@ -253,6 +252,10 @@ struct server_chat_params {
     bool allow_image;
     bool allow_audio;
     bool enable_thinking = true;
+    bool parallel_tool_calls = false;
+    int  reasoning_budget = -1;
+    std::string reasoning_budget_message;
+    bool force_pure_content = false;
 };
 
 // used by /chat/completions endpoint
diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp
index 81422dc5c4..6f9ff8bbe7 100644
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@@ -14,6 +14,7 @@
 #include <fstream>
 #include <iostream>
 #include <regex>
+#include <exception>
 
 static void log_text(const gpt_params & params_base, const std::string & text) {
     if (params_base.minilog) {
@@ -314,14 +315,13 @@ void server_context::init() {
         }
 
         // thinking is enabled if:
-        // 1. It's not explicitly disabled (reasoning_budget == 0)
-        // 2. The chat template supports it
-        const bool enable_thinking = params_base.use_jinja && params_base.reasoning_budget != 0 && common_chat_templates_support_enable_thinking(chat_templates.get());
-        SRV_INF("%s: chat template, thinking = %d\n", __func__, enable_thinking);
+            // 1. It's not explicitly disabled via --reasoning off
+            // 2. The chat template supports it
+        const bool template_supports_thinking = params_base.use_jinja && common_chat_templates_support_enable_thinking(chat_templates.get());
+        const bool enable_thinking = params_base.enable_reasoning != 0 && template_supports_thinking;
 
         chat_params = {
             /* use_jinja             */ params_base.use_jinja,
-            /* use_peg               */ params_base.use_peg,
             /* prefill_assistant     */ params_base.prefill_assistant,
             /* reasoning_format      */ params_base.reasoning_format,
             /* chat_template_kwargs  */ params_base.default_template_kwargs,
@@ -329,6 +329,10 @@ void server_context::init() {
             /* allow_image           */ mctx ? mtmd_support_vision(mctx) : false,
             /* allow_audio           */ mctx ? mtmd_support_audio(mctx) : false,
             /* enable_thinking       */ enable_thinking,
+            /* parallel_tool_calls  */ params_base.parallel_tool_calls,
+            /* reasoning_budget      */ params_base.reasoning_budget,
+            /* reasoning_budget_msg  */ params_base.reasoning_budget_message,
+            /* force_pure_content    */ params_base.force_pure_content_parser
            // /* media_path            */ params_base.media_path,
         };
     }
@@ -540,20 +544,76 @@ result_timings server_slot::get_timings() const {
     return timings;
 }
 
-const common_chat_msg& server_slot::update_chat_msg(std::vector<common_chat_msg_diff>& diffs) {
-    auto previous_msg = chat_msg;
+const common_chat_msg& server_slot::update_chat_msg(bool is_partial, std::vector<common_chat_msg_diff>& diffs,
+    bool filter_tool_calls) {
+    auto msg_prv_copy = chat_msg;
     auto new_msg = common_chat_parse(
         generated_text,
         /* is_partial= */ stop != STOP_TYPE_EOS,
-        params.oaicompat_chat_syntax);
+        params.chat_parser_params);
     if (!new_msg.empty()) {
-        new_msg.ensure_tool_call_ids_set(generated_tool_call_ids, gen_tool_call_id);
+        //new_msg.ensure_tool_call_ids_set(generated_tool_call_ids, gen_tool_call_id);
+        new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
         chat_msg = new_msg;
-        diffs = common_chat_msg_diff::compute_diffs(previous_msg, new_msg.empty() ? previous_msg : new_msg);
+        auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
+
+        if (!filter_tool_calls) {
+            diffs = std::move(all_diffs);
+        } else {
+            for (auto & d : all_diffs) {
+                // If this is a new type of delta, flush all currently pending tool call names
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (sent_tool_call_names.count(i) || chat_msg.tool_calls[i].name.empty()) {
+                        continue;
+                    }
+                    if (d.tool_call_index != i || !d.tool_call_delta.arguments.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index = i;
+                        header.tool_call_delta.id = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+
+                if (d.tool_call_index == std::string::npos) {
+                    diffs.push_back(std::move(d));
+                } else {
+                    size_t i = d.tool_call_index;
+                    if (sent_tool_call_names.count(i)) {
+                        if (!d.tool_call_delta.arguments.empty()) {
+                            d.tool_call_delta.name = "";
+                            d.tool_call_delta.id = "";
+                            diffs.push_back(std::move(d));
+                        }
+                    } else {
+                        // Not sent yet.
+                        if (!d.tool_call_delta.arguments.empty() || !is_partial) {
+                            d.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                            d.tool_call_delta.id = chat_msg.tool_calls[i].id;
+                            diffs.push_back(std::move(d));
+                            sent_tool_call_names.insert(i);
+                        } else {
+                            // Suppress
+                        }
+                    }
+                }
+            }
+            // Final check at EOF
+            if (!is_partial) {
+                for (size_t i = 0; i < chat_msg.tool_calls.size(); ++i) {
+                    if (!sent_tool_call_names.count(i) && !chat_msg.tool_calls[i].name.empty()) {
+                        common_chat_msg_diff header;
+                        header.tool_call_index = i;
+                        header.tool_call_delta.id = chat_msg.tool_calls[i].id;
+                        header.tool_call_delta.name = chat_msg.tool_calls[i].name;
+                        diffs.push_back(std::move(header));
+                        sent_tool_call_names.insert(i);
+                    }
+                }
+            }
+        }
     }
-    //LLAMA_LOG_DEBUG("Parsing chat message: %s\n", generated_text.c_str());
-    //LLAMA_LOG_DEBUG("Parsing chat message: %s\n", chat_msg.reasoning_content.c_str());
-    //LLAMA_LOG_DEBUG("Parsing chat message: %s\n", chat_msg.content.c_str());
     return chat_msg;
 }
 
@@ -1003,16 +1063,27 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
         try {
             auto schema = json_value(data, "json_schema", json::object());
             LLAMA_LOG_DEBUG("JSON schema: %s\n", schema.dump(2).c_str());
-            slot.sparams.grammar = json_schema_to_grammar(schema);
-            LLAMA_LOG_DEBUG("Converted grammar: %s\n", slot.sparams.grammar.c_str());
+            std::string grammar_str = json_schema_to_grammar(schema);
+            SRV_DBG("Converted grammar: %s\n", grammar_str.c_str());
+            slot.sparams.grammar = { COMMON_GRAMMAR_TYPE_OUTPUT_FORMAT, std::move(grammar_str) };
         }
         catch (const std::exception& e) {
             throw std::runtime_error(std::string("\"json_schema\": ") + e.what());
         }
     }
     else {
-        slot.sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
-        LLAMA_LOG_DEBUG("Grammar: %s\n", slot.sparams.grammar.c_str());
+        std::string grammar_str = json_value(data, "grammar", std::string());
+        if (!grammar_str.empty()) {
+            // grammar_type key is set by the server when converting chat template grammars
+            std::string grammar_type = json_value(data, "grammar_type", std::string());
+            if (grammar_type == "tool_calls") {
+                slot.sparams.grammar = { COMMON_GRAMMAR_TYPE_TOOL_CALLS, std::move(grammar_str) };
+            } else {
+                // explicit grammar from the user (API field "grammar")
+                slot.sparams.grammar = { COMMON_GRAMMAR_TYPE_USER, std::move(grammar_str) };
+            }
+            LLAMA_LOG_DEBUG("Grammar (%s): %s\n", grammar_type.c_str(), common_grammar_value(slot.sparams.grammar).c_str());
+        }
         slot.sparams.grammar_lazy = json_value(data, "grammar_lazy", default_sparams.grammar_lazy);
         LLAMA_LOG_DEBUG("Grammar lazy: %s\n", slot.sparams.grammar_lazy ? "true" : "false");
     }
@@ -1100,23 +1171,25 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
     {
         auto it = data.find("chat_format");
         if (it != data.end()) {
-            slot.params.oaicompat_chat_syntax.format = static_cast<common_chat_format>(it->get<int>());
-            LLAMA_LOG_DEBUG("Chat format: %s\n", common_chat_format_name(slot.params.oaicompat_chat_syntax.format));
+            slot.params.chat_parser_params.format = static_cast<common_chat_format>(it->get<int>());
+            LLAMA_LOG_DEBUG("Chat format: %s\n", common_chat_format_name(slot.params.chat_parser_params.format));
         }
         else {
-            slot.params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
+            slot.params.chat_parser_params.format = defaults.chat_parser_params.format;
         }
         common_reasoning_format reasoning_format = params_base.reasoning_format;
         if (data.contains("reasoning_format")) {
             reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
         }
-        slot.params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
-        slot.params.oaicompat_chat_syntax.reasoning_in_content = slot.params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
-        slot.params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
+        slot.params.chat_parser_params.reasoning_format = reasoning_format;
+        slot.params.chat_parser_params.reasoning_in_content = slot.params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+        slot.params.chat_parser_params.generation_prompt = json_value(data, "generation_prompt", std::string());
+        slot.sparams.generation_prompt = slot.params.chat_parser_params.generation_prompt;
+        LLAMA_LOG_DEBUG("Generation prompt: '%s'\n", slot.params.chat_parser_params.generation_prompt.c_str());
+        slot.params.chat_parser_params.parse_tool_calls = json_value(data, "parse_tool_calls", false);
         if (data.contains("chat_parser")) {
-            slot.params.oaicompat_chat_syntax.parser.load(data.at("chat_parser").get<std::string>());
+            slot.params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
         }
-        slot.params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
     }
     {
 
@@ -1181,6 +1254,29 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
         }
     }
 
+    // Parse reasoning budget sampler parameters
+    {
+        const int32_t budget = json_value(data, "reasoning_budget_tokens", (int32_t)-1);
+        const auto start_tag = json_value(data, "reasoning_budget_start_tag", std::string());
+        const auto end_tag = json_value(data, "reasoning_budget_end_tag", std::string());
+        const auto message = json_value(data, "reasoning_budget_message", std::string());
+        slot.sparams.reasoning_budget_tokens = budget;
+
+        if (!start_tag.empty()) {
+            slot.sparams.reasoning_budget_start = common_tokenize(vocab, start_tag, false, true);
+        }
+        if (!end_tag.empty()) {
+            slot.sparams.reasoning_budget_end = common_tokenize(vocab, end_tag, false, true);
+            slot.sparams.reasoning_budget_forced = common_tokenize(vocab, message + end_tag, false, true);
+
+            SRV_DBG("reasoning budget: tokens=%d, generation_prompt='%s', start=%zu toks, end=%zu toks, forced=%zu toks\n",
+                budget, slot.sparams.generation_prompt.c_str(),
+                slot.sparams.reasoning_budget_start.size(),
+                slot.sparams.reasoning_budget_end.size(),
+                slot.sparams.reasoning_budget_forced.size());
+        }
+    }
+
     {  // apply logit bias
         const auto& logit_bias = data.find("logit_bias");
         if (logit_bias != data.end() && (logit_bias->is_object() || logit_bias->is_array())) {
@@ -1475,7 +1571,7 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
         if (params_base.ctx_shift) {
             params_base.ctx_shift = false;
             LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled");
-        }
+    }
     }
     {
         const auto& stop = data.find("stop");
@@ -1505,10 +1601,18 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
     }
 
     {
-        if (slot.ctx_sampling != nullptr) {
-            common_sampler_free(slot.ctx_sampling);
+        try
+        {
+            if (slot.ctx_sampling != nullptr) {
+                common_sampler_free(slot.ctx_sampling);
+            }
+            slot.ctx_sampling = common_sampler_init(model, slot.sparams);
+        }
+        catch (std::exception & e) {
+            std::string err_msg = std::string("Failed to initialize samplers: ") + e.what();
+            send_error(task, err_msg, ERROR_TYPE_INVALID_REQUEST);
+            return false;
         }
-        slot.ctx_sampling = common_sampler_init(model, slot.sparams);
         if (slot.ctx_sampling == nullptr) {
             // for now, the only error that may happen here is invalid grammar
             send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
@@ -1826,13 +1930,12 @@ json server_context::get_formated_generation(const server_slot& slot) const {
         {"logit_bias",                slot.sparams.logit_bias},
         {"n_probs",                   slot.sparams.n_probs},
         {"min_keep",                  slot.sparams.min_keep},
-        {"grammar",                   slot.sparams.grammar},
+        {"grammar",                   slot.sparams.grammar.grammar},
         {"grammar_triggers",          grammar_triggers},
         {"preserved_tokens",          slot.sparams.preserved_tokens},
-        {"chat_format",               common_chat_format_name(slot.params.oaicompat_chat_syntax.format)},
-        {"reasoning_format",          common_reasoning_format_name(slot.params.oaicompat_chat_syntax.reasoning_format)},
-        {"reasoning_in_content",      slot.params.oaicompat_chat_syntax.reasoning_in_content},
-        {"thinking_forced_open",      slot.params.oaicompat_chat_syntax.thinking_forced_open},
+        {"chat_format",               common_chat_format_name(slot.params.chat_parser_params.format)},
+        {"reasoning_format",          common_reasoning_format_name(slot.params.chat_parser_params.reasoning_format)},
+        {"reasoning_in_content",      slot.params.chat_parser_params.reasoning_in_content},
         {"samplers",                  samplers_sequence}
     };
 }
@@ -1901,7 +2004,7 @@ void server_context::send_partial_response(server_slot& slot, completion_token_o
         {"id_slot",    slot.id},
         {"multimodal", false}
     };
-    slot.update_chat_msg(res->oaicompat_msg_diffs);
+    slot.update_chat_msg(true, res->oaicompat_msg_diffs);
 
     res->anthropic_has_reasoning = !slot.chat_msg.reasoning_content.empty();
 
@@ -1962,7 +2065,7 @@ void server_context::send_final_response(server_slot& slot) {
     res->post_sampling_probs = slot.params.post_sampling_probs;
     res->oaicompat = slot.params.oaicompat;
     res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
-    res->oaicompat_msg = slot.update_chat_msg(res->oaicompat_msg_diffs);
+    res->oaicompat_msg = slot.update_chat_msg(false, res->oaicompat_msg_diffs);
     res->oai_resp_id = slot.oai_resp_id;
     res->oai_resp_reasoning_id = slot.oai_resp_reasoning_id;
     res->oai_resp_message_id = slot.oai_resp_message_id;
@@ -3056,11 +3159,11 @@ void  server_context::create_checkpoint_at_interval(server_slot & slot, const gp
         if (slot.checkpoint_pos + params_base.ctx_checkpoints_interval <= 1 + pos) {
             bool created = create_checkpoint(slot);
             if (created) {
-                slot.checkpoint_pos = pos;
-            }
+            slot.checkpoint_pos = pos;
         }
     }
 }
+}
 
 void server_context::apply_checkpoint(server_slot & slot) {
     llama_pos pos_next = slot.cache_tokens.pos_next(slot.n_past);
@@ -3094,9 +3197,9 @@ void server_context::apply_checkpoint(server_slot & slot) {
                     do_reset = true;
                     //printf("[DEBUG] `do_reset` was set to `true` after failing to restore a checkpoint");
                 } else {
-                    slot.n_past = std::min(slot.n_past, std::max(it->pos_min+1, it->pos_max));
+                    slot.n_past = std::min(slot.n_past, std::max(it->pos_min + 1, it->pos_max));
                     slot.n_past = slot.cache_tokens.size_up_to_pos(slot.n_past-1);
-                    slot.n_past_prompt = std::min(slot.n_past_prompt, std::max(it->pos_min_prompt+1, it->pos_max_prompt));
+                    slot.n_past_prompt = std::min(slot.n_past_prompt, std::max(it->pos_min_prompt + 1, it->pos_max_prompt));
                     slot.n_past_prompt = slot.prompt_tokens.size_up_to_pos(slot.n_past_prompt-1);
                     SLT_WRN(slot, "restored context checkpoint took  %.2f ms (pos_min = %d, pos_max = %d, size = %.3f MiB)\n", (ggml_time_us() - t_start) / 1000.0, it->pos_min, it->pos_max, (float)checkpoint_size / 1024 / 1024);
                 }
diff --git a/examples/server/server-context.h b/examples/server/server-context.h
index 50e0513102..1ece6f7ca6 100644
--- a/examples/server/server-context.h
+++ b/examples/server/server-context.h
@@ -73,7 +73,7 @@ struct server_slot {
     std::vector<int32_t> i_batch_dft;
 
     std::vector<completion_token_output> generated_token_probs;
-    common_chat_msg chat_msg;
+
 
     bool infill = false;
     bool embedding = false;
@@ -129,7 +129,9 @@ struct server_slot {
     json json_schema;
 
     common_chat_format chat_format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
+    common_chat_msg chat_msg;
     std::vector<std::string> generated_tool_call_ids;
+    std::unordered_set<size_t> sent_tool_call_names;
 
     bool anthropic_thinking_block_started = false;
     bool anthropic_text_block_started = false;
@@ -195,7 +197,8 @@ struct server_slot {
 
     result_timings get_timings() const;
 
-    const common_chat_msg& update_chat_msg(std::vector<common_chat_msg_diff>& diffs);
+    const common_chat_msg& update_chat_msg(bool is_partial, std::vector<common_chat_msg_diff>& diffs,
+        bool filter_tool_calls = false);
 
     size_t find_stopping_strings(const std::string& text, const size_t last_token_size, bool is_full_stop);
 
diff --git a/examples/server/server-task.h b/examples/server/server-task.h
index 9529261f76..5d6a791023 100644
--- a/examples/server/server-task.h
+++ b/examples/server/server-task.h
@@ -71,7 +71,7 @@ struct slot_params {
     oaicompat_type        oaicompat = OAICOMPAT_TYPE_NONE;
     std::string           oaicompat_model;
     std::string           oaicompat_cmpl_id;
-    common_chat_syntax           oaicompat_chat_syntax;
+    common_chat_parser_params           chat_parser_params;
 
     // Embeddings
     int32_t embd_normalize = 2; // (-1=none, 0=max absolute int16, 1=taxicab, 2=Euclidean/L2, >2=p-norm)
diff --git a/examples/server/webui_llamacpp/src/lib/services/parameter-sync.spec.ts b/examples/server/webui_llamacpp/src/lib/services/parameter-sync.spec.ts
index 9ced55faa0..b40f972151 100644
--- a/examples/server/webui_llamacpp/src/lib/services/parameter-sync.spec.ts
+++ b/examples/server/webui_llamacpp/src/lib/services/parameter-sync.spec.ts
@@ -51,7 +51,7 @@ describe('ParameterSyncService', () => {
 				chat_format: '',
 				reasoning_format: '',
 				reasoning_in_content: false,
-				thinking_forced_open: false,
+				generation_prompt: '',
 				'speculative.n_max': 0,
 				'speculative.n_min': 0,
 				'speculative.p_min': 0.0,
@@ -116,7 +116,7 @@ describe('ParameterSyncService', () => {
 				chat_format: '',
 				reasoning_format: '',
 				reasoning_in_content: false,
-				thinking_forced_open: false,
+				generation_prompt: '',
 				'speculative.n_max': 0,
 				'speculative.n_min': 0,
 				'speculative.p_min': 0.0,
diff --git a/examples/server/webui_llamacpp/src/lib/types/api.d.ts b/examples/server/webui_llamacpp/src/lib/types/api.d.ts
index 1a8bc64989..eda280cbc6 100644
--- a/examples/server/webui_llamacpp/src/lib/types/api.d.ts
+++ b/examples/server/webui_llamacpp/src/lib/types/api.d.ts
@@ -119,7 +119,7 @@ export interface ApiLlamaCppServerProps {
 			chat_format: string;
 			reasoning_format: string;
 			reasoning_in_content: boolean;
-			thinking_forced_open: boolean;
+			generation_prompt: string;
 			samplers: string[];
 			'speculative.n_max': number;
 			'speculative.n_min': number;
@@ -279,7 +279,7 @@ export interface ApiSlotData {
 		chat_format: string;
 		reasoning_format: string;
 		reasoning_in_content: boolean;
-		thinking_forced_open: boolean;
+		generation_prompt: string;
 		samplers: string[];
 		'speculative.n_max': number;
 		'speculative.n_min': number;
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index bf6fb55979..663e0420c3 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -188,7 +188,7 @@ int main(int argc, char ** argv) {
     // draft sequence data
     std::vector<seq_draft> drafts(n_seq_dft);
 
-    params.sparams.grammar.clear(); // the draft samplers will copy the target sampler's grammar
+    params.sparams.grammar = { COMMON_GRAMMAR_TYPE_NONE, ""}; // the draft samplers will copy the target sampler's grammar
     if (params.sparams.temp == 0) {
         params.sparams.temp = -1.0f; // force greedy sampling with probs for the draft model
     }
diff --git a/include/llama.h b/include/llama.h
index 388caff8a5..60951b00c3 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -1253,7 +1253,7 @@ extern "C" {
     LLAMA_API struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar);
 
     /// @details Apply constraints from grammar
-    LLAMA_API void llama_grammar_sample(
+    LLAMA_API void llama_grammar_apply(
             const struct llama_grammar * grammar,
             const struct llama_context * ctx,
                 llama_token_data_array * candidates);
@@ -1261,7 +1261,7 @@ extern "C" {
             struct llama_context * ctx,
           llama_token_data_array * candidates,
       const struct llama_grammar * grammar),
-        "use llama_grammar_sample instead");
+        "use llama_grammar_apply instead");
 
     /// @details Accepts the sampled token into the grammar
     LLAMA_API void llama_grammar_accept_token(
diff --git a/models/templates/Apertus-8B-Instruct.jinja b/models/templates/Apertus-8B-Instruct.jinja
new file mode 100644
index 0000000000..432ae59a40
--- /dev/null
+++ b/models/templates/Apertus-8B-Instruct.jinja
@@ -0,0 +1,327 @@
+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{\n" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro render_tools(tools) -%}
+    {%- for tool in tools %}
+        {{- "// " + tool.function.description + "\n" }}
+        {{- "type "+ tool.function.name + " = " }}
+        {%- if tool.function.parameters and tool.function.parameters.properties %}
+            {{- "(_: {\n" }}
+            {%- for param_name, param_spec in tool.function.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "\n" }}
+                {%- endif %}
+                {{- param_name }}
+                {%- if param_name not in (tool.function.parameters.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{- render_typescript_type(param_spec, tool.function.parameters.required or []) }}
+                {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
+                        {{- "// default: " + param_spec.default }}
+                    {%- else %}
+                        {{- ", // default: " + param_spec.default|tojson }}
+                    {%- endif -%}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- ",\n" }}
+                {%- else %}
+                    {{- "\n" }}
+                {%- endif -%}
+            {%- endfor %}
+            {{- "}) => any;" }}
+        {%- else -%}
+            {{- "() => any;" }}
+        {%- endif -%}
+        {%- if not loop.last -%}
+            {{- "\n" }}
+        {%- endif -%}
+    {%- endfor %}
+{%- endmacro -%}
+
+{{ bos_token }}
+
+{%- set system_token = '<|system_start|>' -%}
+{%- set end_system_token = '<|system_end|>' -%}
+{%- set developer_token = '<|developer_start|>' -%}
+{%- set end_developer_token = '<|developer_end|>' -%}
+{%- set user_token = '<|user_start|>' -%}
+{%- set end_user_token = '<|user_end|>' -%}
+{%- set assistant_token = '<|assistant_start|>' -%}
+{%- set end_assistant_token = '<|assistant_end|>' -%}
+{%- set inner_token = '<|inner_prefix|>' -%}
+{%- set outer_token = '<|inner_suffix|>' -%}
+{%- set tool_calls_token = '<|tools_prefix|>' -%}
+{%- set end_tool_calls_token = '<|tools_suffix|>' -%}
+
+{%- set ns = namespace(in_assistant=false, in_tool=false, in_inner=false, assistant_format=none) -%}
+
+{%- if messages and messages[0].role == 'system' -%}
+    {%- if "content" in messages[0] -%}
+        {%- if messages[0].content is string -%}
+            {{ system_token + messages[0].content + end_system_token }}
+        {%- elif messages[0].content is mapping and "text" in messages[0].content -%}
+            {{ system_token + messages[0].content.text + end_system_token }}
+        {%- else -%}
+            {{- raise_exception("Invalid system message") -}}
+        {%- endif -%}
+    {%- else -%}
+        {{- raise_exception("Invalid system message") -}}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {{ system_token + 'You are Apertus, a helpful assistant created by the SwissAI initiative.\nKnowledge cutoff: 2024-04\nCurrent date: ' + strftime_now('%Y-%m-%d') + end_system_token }}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+
+{{ developer_token + 'Deliberation: ' }}
+{%- if enable_thinking is defined and enable_thinking -%}
+    {{ 'enabled\n' }}
+{%- else -%}
+    {{ 'disabled\n' }}
+{%- endif -%}
+{%- if tools is defined and tools -%}
+    {{ 'Tool Capabilities:\n' + render_tools(tools) }}
+{%- else -%}
+    {{ 'Tool Capabilities: disabled' }}
+{%- endif -%}
+{{ end_developer_token }}
+
+{%- for message in loop_messages -%}
+    {%- if message.role == 'user' -%}
+        {%- set ns.in_inner = false -%}
+        {%- if ns.in_tool -%}
+            {{ ']' }}
+            {%- set ns.in_tool = false -%}
+        {%- endif -%}
+        {%- if ns.in_assistant -%}
+            {{ end_assistant_token }}
+            {%- set ns.in_assistant = false -%}
+        {%- endif -%}
+        {%- if "content" in message -%}
+            {{ user_token }}
+            {%- if message.content is string -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "parts" in message.content -%}
+                {%- set parts = message.content.parts -%}
+                {%- for part in parts -%}
+                    {%- if part.type == "text" -%}
+                        {{ part.text }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid user part: " + part.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid user message: " + message.role) -}}
+            {%- endif -%}
+            {{ end_user_token }}
+        {%- endif -%}
+    {%- elif message.role == 'assistant' -%}
+        {%- if not ns.in_assistant -%}
+            {{ assistant_token }}
+            {%- set ns.in_assistant = true -%}
+        {%- endif -%}
+        {%- if "content" in message and message.content is not none -%}
+            {%- if message.content is string and (ns.assistant_format is none or ns.assistant_format == "string") -%}
+                {%- if ns.in_tool -%}
+                    {{ ']' }}
+                    {%- set ns.in_tool = false -%}
+                {%- endif -%}
+                {%- set ns.assistant_format = "string" -%}
+                {{ message.content }}
+            {%- elif message.content is mapping and "blocks" in message.content and (ns.assistant_format is none or ns.assistant_format == "mapping") -%}
+                {%- set ns.assistant_format = "mapping" -%}
+                {%- set blocks = message.content.blocks -%}
+                {%- for block in blocks -%}
+                    {%- if block.type == 'thoughts' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if not ns.in_inner -%}
+                            {%- set ns.in_inner = true -%}
+                            {{ inner_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- elif block.type == 'tool_calls' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if ns.in_inner and not loop.first and block.calls|length == 1 and block.calls[0].name == 'display_answers' -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ tool_calls_token + '[' }}
+                        {%- for tool_call in block.calls -%}
+                            {{- '{"' + tool_call.name + '": ' + tool_call.arguments + '}' }}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{ ']' + end_tool_calls_token }}
+                    {%- elif block.type == 'tool_outputs' -%}
+                        {%- if ns.in_tool -%}
+                            {{- raise_exception("Cannot have both tool outputs as separate messages and tool outputs as blocks") -}}
+                        {%- endif -%}
+                        {{ '[' }}
+                        {%- for tool_output in block.outputs -%}
+                            {{- tool_output.output }}
+                            {%- if not loop.last -%}
+                                {{- ", " }}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {{- ']' }}
+                    {%- elif block.type == 'response' -%}
+                        {%- if ns.in_tool -%}
+                            {{ ']' }}
+                            {%- set ns.in_tool = false -%}
+                        {%- endif -%}
+                        {%- if (not loop.first and ns.in_inner) or (ns.in_assistant and ns.in_inner) -%}
+                            {%- set ns.in_inner = false -%}
+                            {{ outer_token }}
+                        {%- endif -%}
+                        {{ block.text }}
+                    {%- else -%}
+                        {{- raise_exception("Invalid assistant block type: " + block.type) -}}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- else -%}
+                {{- raise_exception("Invalid assistant content '" + message.content + "', expected " + ns.assistant_format) -}}
+            {%- endif -%}
+        {%- elif "tool_calls" not in message -%}
+            {{- raise_exception("Invalid assistant message " + message) -}}
+        {%- endif -%}
+        {%- if "tool_calls" in message and message.tool_calls -%}
+            {{ tool_calls_token + '[' }}
+            {%- for tool_call in message.tool_calls -%}
+                {%- if tool_call.type == 'function' -%}
+                    {%- set function = tool_call.function -%}
+                    {{- '{"' + function.name + '": ' + function.arguments|tojson + '}' }}
+                    {%- if not loop.last -%}
+                        {{- ", " }}
+                    {%- endif -%}
+                {%- else -%}
+                    {{- raise_exception("Invalid tool call type: " + tool_call.type) -}}
+                {%- endif -%}
+            {%- endfor -%}
+            {{ ']' + end_tool_calls_token }}
+        {%- endif -%}
+    {%- elif message.role == 'tool' -%}
+        {%- if not ns.in_assistant -%}
+            {{- raise_exception("Tool message outside of assistant") -}}
+        {%- endif -%}
+        {%- if not ns.in_tool -%}
+            {{ '[' }}
+            {%- set ns.in_tool = true -%}
+        {%- else -%}
+            {{ ", "}}
+        {%- endif -%}
+        {{ message.content }}
+    {%- else -%}
+        {{- raise_exception("Invalid message role") -}}
+    {%- endif -%}
+{%- endfor -%}
+{%- if ns.in_tool -%}
+    {{ ']' }}
+{%- endif -%}
+{%- if add_generation_prompt -%}
+    {{ assistant_token }}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
new file mode 100755
index 0000000000..8a282b8231
--- /dev/null
+++ b/models/templates/Apriel-1.6-15b-Thinker-fixed.jinja
@@ -0,0 +1,171 @@
+{# ---------------------------------------------------------------------- #}
+{# ƛƬ Default setup and flags                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- set messages = messages or [] -%}
+{%- set tools = tools or [] -%}
+{%- set add_generation_prompt = add_generation_prompt or false -%}
+{%- set available_tool_string = '' -%}
+{%- set add_tool_id = true -%}
+{%- set add_thoughts = true -%}            {# whether to include <thinking> reasoning blocks #}
+{# Optional token placeholders (safe defaults) #}
+{%- set bos_token = bos_token or '' -%}
+{%- set eos_token = eos_token or '' -%}
+{# ---------------------------------------------------------------------- #}
+{# Core reasoning prompt and assistant reasoning prefix                 #}
+{# ---------------------------------------------------------------------- #}
+{%- set reasoning_prompt -%}
+    You are a thoughtful, systematic AI assistant from ServiceNow Language Models (SLAM) lab.
+    Analyze each question carefully, present your reasoning step-by-step, then provide the final
+    response after the marker [BEGIN FINAL RESPONSE].
+{%- endset -%}
+{%- set reasoning_asst_turn_start = 'Here are my reasoning steps:\n' -%}
+{# ---------------------------------------------------------------------- #}
+{# Tool list and tool call output format                                  #}
+{# ---------------------------------------------------------------------- #}
+{%- if tools|length > 0 -%}
+    {%- set available_tool_string -%}
+        You are provided with function signatures within <available_tools></available_tools> XML tags.
+        You may call one or more functions to assist with the user query.
+        Don't make assumptions about the arguments. You should infer the argument values from previous
+        user responses and the system message.
+        Here are the available tools: 
+        <available_tools>
+        {% for tool in tools %}{{ tool|string }}{% endfor %}
+        
+        </available_tools>.
+
+        Return all function calls as a list of JSON objects within <tool_calls></tool_calls> XML tags.
+        Each JSON object should contain a function name and arguments as follows:
+        <tool_calls>[
+            {"name": <function-name-1>, "arguments": <args-dict-1>},
+            {"name": <function-name-2>, "arguments": <args-dict-2>},
+            ...
+        ]</tool_calls>
+    {%- endset -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Start system block if first message is not system                      #}
+{# ---------------------------------------------------------------------- #}
+{%- if messages|length > 0 and messages[0]['role'] != 'system' -%}
+    {%- if tools|length > 0 -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + available_tool_string + '\n' }}
+    {%- else -%}
+        {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' }}
+    {%- endif -%}
+{%- endif -%}
+{# ---------------------------------------------------------------------- #}
+{# Iterate through messages                                             #}
+{# ---------------------------------------------------------------------- #}
+{%- for message in messages -%}
+
+    {# ---------------- USER MESSAGE ---------------- #}
+    {%- if message['role'] == 'user' -%}
+        {{ '<|begin_user|>\n' }}
+        {%- if message['content'] is not string -%}
+            {%- for chunk in message['content'] -%}
+                {%- if chunk['type'] == 'text' -%}
+                    {{ chunk['text'] }}
+                {%- elif chunk['type'] in ['image', 'image_url'] -%}
+                    {{ '[IMG]' }}
+                {%- else -%}
+                    {{ raise_exception('Unrecognized content type!') }}
+                {%- endif -%}
+            {%- endfor -%}
+        {%- else -%}
+            {{ message['content'] }}
+        {%- endif -%}
+
+    {# ---------------- SYSTEM MESSAGE ---------------- #}
+    {%- elif message['role'] == 'system' -%}
+        {%- set sys_content = message.get('content', '') -%}
+        {%- if sys_content and sys_content|length > 0 -%}
+            {%- if sys_content is string -%}
+                {%- set system_message = sys_content -%}
+            {%- else -%}
+                {%- set system_message = sys_content[0]['text'] -%}
+            {%- endif -%}
+        {%- else -%}
+            {%- set system_message = '' -%}
+        {%- endif -%}
+
+        {%- if tools|length > 0 -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' + available_tool_string + '\n' }}
+        {%- else -%}
+            {{ bos_token + '<|begin_system|>\n' + reasoning_prompt + '\n' + system_message + '\n' }}
+        {%- endif -%}
+
+    {# ---------------- ASSISTANT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'assistant' -%}
+        {%- if loop.last -%}
+            {%- set add_tool_id = false -%}
+        {%- endif -%}
+
+        {{ '\n<|begin_assistant|>\n' }}
+
+        {%- if add_thoughts and message.get('reasoning_content') and loop.last -%}
+            {{ message['reasoning_content'] + '\n[BEGIN FINAL RESPONSE]\n' }}
+        {%- endif -%}
+
+        {%- set asst_content = message.get('content', '') -%}
+        {%- if asst_content and asst_content|length > 0 -%}
+            {%- if asst_content is not string -%}
+                {%- set asst_text = asst_content[0]['text'] -%}
+            {%- else -%}
+                {%- set asst_text = asst_content -%}
+            {%- endif -%}
+            {# For historical turns (not the last), strip reasoning and keep only final response #}
+            {%- if not loop.last and '[BEGIN FINAL RESPONSE]' in asst_text -%}
+                {{- asst_text.split('[BEGIN FINAL RESPONSE]')[-1] | trim -}}
+            {%- else -%}
+                {{- asst_text -}}
+            {%- endif -%}
+        {%- elif message.get('chosen') and message['chosen']|length > 0 -%}
+            {{ message['chosen'][0] }}
+        {%- endif -%}
+
+        {# Tool call output #}
+        {%- set tool_calls = message.get('tool_calls', []) -%}
+        {%- if tool_calls and tool_calls|length > 0 -%}
+            {{ '\n<tool_calls>[' }}
+            {%- for tool_call in tool_calls -%}
+                {{ '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson }}
+                {%- if add_tool_id == true and 'id' in tool_call -%}
+                    {{ ', "id": "' + tool_call['id'] + '"' }}
+                {%- endif -%}
+                {{ '}' }}
+                {%- if not loop.last -%}{{ ', ' }}{%- endif -%}
+            {%- endfor -%}
+            {{ ']</tool_calls>' }}
+        {%- endif -%}
+
+        {%- set training_prompt = training_prompt if (training_prompt is defined) else false -%}
+        {%- if not loop.last or training_prompt -%}
+            {{ '\n<|end|>\n' }}
+        {%- endif -%}
+
+    {# ---------------- TOOL RESULT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'tool' -%}
+        {%- set tool_content = message.get('content', '') -%}
+        {%- if tool_content is string -%}
+            {%- set tool_message = tool_content -%}
+        {%- else -%}
+            {%- set tool_message = tool_content[0]['text'] if tool_content else '' -%}
+        {%- endif -%}
+        {{ '<|begin_tool_result|>\n' + tool_message|string + '\n' }}
+
+    {# ---------------- CONTENT MESSAGE ---------------- #}
+    {%- elif message['role'] == 'content' -%}
+        {%- set msg_content = message.get('content', '') -%}
+        {%- if msg_content is not string -%}
+            {{ '<|begin_content|>\n' + msg_content[0]['text'] + '\n' }}
+        {%- else -%}
+            {{ '<|begin_content|>\n' + msg_content + '\n' }}
+        {%- endif -%}
+    {%- endif -%}
+
+    {# ---------------- REASONING PROMPT BEFORE NEXT ASSISTANT ---------------- #}
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{ '\n<|begin_assistant|>\n' + reasoning_asst_turn_start }}
+    {%- endif -%} 
+
+{%- endfor -%}
diff --git a/models/templates/Bielik-11B-v3.0-Instruct.jinja b/models/templates/Bielik-11B-v3.0-Instruct.jinja
new file mode 100644
index 0000000000..40ef50076e
--- /dev/null
+++ b/models/templates/Bielik-11B-v3.0-Instruct.jinja
@@ -0,0 +1,77 @@
+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_start_index = 1 %}
+{%- else %}
+    {%- set system_message = "" %}
+    {%- set loop_start_index = 0 %}
+{%- endif %}
+
+{%- if system_message or tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if system_message %}
+        {{- system_message }}
+    {%- endif %}
+    {%- if tools %}
+        {{- '\n\nMasz dostęp do następujących narzędzi. Definicje narzędzi znajdują się poniżej wewnątrz znaczników <|function_list|>:\n<|function_list|>\n' }}
+        {{- '[' }}
+        {%- for tool in tools %}
+            {{- tool | tojson }}
+            {%- if not loop.last %}
+                {{- ',\n' }}
+            {%- endif %}
+        {%- endfor %}
+        {{- ']\n<|function_list|>\n\nAby wywołać narzędzie, użyj formatu <tool_call>{"name": "nazwa_narzędzia", "arguments": {"argument": "wartość"}}</tool_call>. Wyniki działania narzędzi zostaną przekazane z markerem <|function_output|>.\n' }}
+    {%- endif %}
+    {%- if enable_thinking %}
+        {{- '\n\nZanim odpowiesz na pytanie, najpierw przemyśl swoje kroki i umieść swoje myśli wewnątrz tagów <think>...</think>. Musisz najpierw pomyśleć, zanim udzielisz odpowiedzi. ' -}}
+        {{- 'WAŻNE: Powinieneś myśleć w tym samym języku, co pytanie użytkownika. Jeśli pytanie jest zadane po polsku, powinieneś również myśleć po polsku. Jeśli pytanie jest po angielsku, myślisz również po angielsku itd. ' }}
+        {{- '** PAMIĘTAJ! ** Pytanie po polsku -> myślenie po polsku -> odpowiedź po polsku!' -}}
+        {{- '*** BARDZO WAŻNE!!! *** Jesteś Bielikiem, polskim modelem językowym. Twoją główną cechą jest umiejętność pisania po polsku. Jeśli użytkownik zadaje Ci pytania po polsku, ZAWSZE odpowiadaj po polsku. ' -}}
+        {{- 'Nawet, jeśli korzystasz z narzędzia, którego większość instrukcji jest po angielsku, powinieneś przede wszystkim odpowiadać po polsku, jeśli użytkownik zadaje pytanie w tym języku. ' -}}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- endif %}
+
+{%- for message in messages[loop_start_index:] %}
+    {%- if message['role'] == 'user' %}
+        {{- '<|im_start|>user\n' + message['content'] + '<|im_end|>\n' }}
+    {%- elif message['role'] == 'assistant' %}
+        {{- '<|im_start|>assistant\n' }}
+        {%- set content = message.content | default('') %}
+        {%- set reasoning_content = message.reasoning_content | default('') %}
+        {%- if not reasoning_content and '<think>' in content and '</think>' in content %}
+            {%- set reasoning_parts = content.split('</think>') %}
+            {%- set reasoning_content = reasoning_parts[0].split('<think>')[-1] %}
+            {%- set content = reasoning_parts[1:] | join('</think>') %}
+        {%- endif %}
+        {%- if reasoning_content %}
+            {{- '<think>\n' + reasoning_content.strip() + '\n</think>\n' }}
+        {%- endif %}
+        {{- content.lstrip() }}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '\n<tool_call>\n{"name": "' + tool_call.name + '", "arguments": ' + (tool_call.arguments if tool_call.arguments is string else tool_call.arguments | tojson) + '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message['role'] == 'tool' %}
+        {%- if loop.index0 == 0 or messages[loop.index0 - 1]['role'] != 'tool' %}
+            {{- '<|im_start|>user\n' }}
+        {%- endif %}
+        {{- '<|function_output|>' + message['content'] }}
+        {%- if loop.last or messages[loop.index0 + 1]['role'] != 'tool' %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
index 078e9f5458..fcf1259d33 100644
--- a/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
+++ b/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
@@ -132,7 +132,7 @@ The following instructions take precedence over instructions in the default prea
     {%- elif message.role|lower == 'user' %}
 <|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{ message.content }}<|END_OF_TURN_TOKEN|>{%- if documents and not sent_documents.value %}{%- set sent_documents.value = true %}{% set tool_idx.value = tool_idx.value + 1 %}{{ document_turn(documents) }}{% endif %}
     {%- elif message.role|lower == 'assistant' or message.role|lower == 'chatbot' %}
-<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.tool_plan}}<|END_THINKING|><|START_ACTION|>[
+<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{% if message.tool_calls %}<|START_THINKING|>{{message.reasoning_content}}<|END_THINKING|><|START_ACTION|>[
     {% for tc in message.tool_calls %}
     {"tool_call_id": "{{ tool_idx.value }}", "tool_name": "{{ tc['function']['name'] }}", "parameters": {{ tc['function']['arguments']|tojson }}}{% if not loop.last %},{% endif %}
 
@@ -153,4 +153,4 @@ The following instructions take precedence over instructions in the default prea
 
 ]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|>
     {%- endif %}
-{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
\ No newline at end of file
+{%- endfor %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{%- if not enable_thinking -%}<|START_THINKING|><|END_THINKING|>{%- endif %}
\ No newline at end of file
diff --git a/models/templates/GLM-4.7-Flash.jinja b/models/templates/GLM-4.7-Flash.jinja
new file mode 100644
index 0000000000..2ab98ef068
--- /dev/null
+++ b/models/templates/GLM-4.7-Flash.jinja
@@ -0,0 +1,86 @@
+[gMASK]<sop>
+{%- if tools -%}
+<|system|>
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{% for tool in tools %}
+{{ tool | tojson(ensure_ascii=False) }}
+{% endfor %}
+</tools>
+
+For each function call, output the function name and arguments within the following XML format:
+<tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
+{%- macro visible_text(content) -%}
+    {%- if content is string -%}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping -%}
+        {%- for item in content -%}
+            {%- if item is mapping and item.type == 'text' -%}
+                {{- item.text }}
+            {%- elif item is string -%}
+                {{- item }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{- content }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- set ns = namespace(last_user_index=-1) %}
+{%- for m in messages %}
+    {%- if m.role == 'user' %}
+        {% set ns.last_user_index = loop.index0 -%}
+    {%- endif %}
+{%- endfor %}
+{% for m in messages %}
+{%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
+{%- elif m.role == 'assistant' -%}
+<|assistant|>
+{%- set reasoning_content = '' %}
+{%- set content = visible_text(m.content) %}
+{%- if m.reasoning_content is string %}
+    {%- set reasoning_content = m.reasoning_content %}
+{%- else %}
+    {%- if '</think>' in content %}
+        {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+        {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+    {%- endif %}
+{%- endif %}
+{%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
+{{ '<think>' + reasoning_content.strip() +  '</think>'}}
+{%- else -%}
+{{ '</think>' }}
+{%- endif -%}
+{%- if content.strip() -%}
+{{ content.strip() }}
+{%- endif -%}
+{% if m.tool_calls %}
+{% for tc in m.tool_calls %}
+{%- if tc.function %}
+    {%- set tc = tc.function %}
+{%- endif %}
+{{- '<tool_call>' + tc.name -}}
+{% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
+{% endif %}
+{%- elif m.role == 'tool' -%}
+{%- if m.content is string -%}
+{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+    {{- '<|observation|>' }}
+{%- endif %}
+{{- '<tool_response>' }}
+{{- m.content }}
+{{- '</tool_response>' }}
+{%- else -%}
+<|observation|>{% for tr in m.content %}
+<tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
+{% endif -%}
+{%- elif m.role == 'system' -%}
+<|system|>{{ visible_text(m.content) }}
+{%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/GigaChat3-10B-A1.8B.jinja b/models/templates/GigaChat3-10B-A1.8B.jinja
new file mode 100644
index 0000000000..f9826a4457
--- /dev/null
+++ b/models/templates/GigaChat3-10B-A1.8B.jinja
@@ -0,0 +1,355 @@
+{#--------TOOL RENDERING FUNCTIONS---------#}
+
+{#---------------------------------------------------------------
+  Converts JSON Schema (dict) to a TypeScript type definition
+----------------------------------------------------------------#}
+{%- macro json_schema_to_typescript(schema, indent="") -%}
+	{%- set ADDITIONAL_JSON_KEYS = ['format', 'maxItems', 'maximum', 'minItems', 'minimum', 'pattern'] -%}
+	{%- set ty = schema.get("type") -%}
+
+	{# ---------------- OBJECT ---------------- #}
+	{%- if ty == "object" -%}
+		{{- "{\n" -}}
+
+		{# Start building property list #}
+		{%- set props = schema.get("properties", {}) -%}
+		{%- set required = schema.get("required", []) -%}
+		{%- set has_additional_props = schema.get("additionalProperties") is defined -%}
+		{%- set additional_props_type = none -%}
+		{%- if has_additional_props -%}
+			{%- if schema.additionalProperties == true -%}
+				{%- set additional_props_type = {'type': 'any'} -%}
+			{%- elif schema.additionalProperties is mapping -%}
+				{%- set additional_props_type = schema.additionalProperties -%}
+			{%- endif -%}
+		{%- endif -%}
+
+		{%- for key, val in props.items() -%}
+			{# ---------- Description Comments ---------- #}
+			{%- if "description" in val -%}
+				{%- for line in val['description'].split('\n') -%}
+					{%- if line.strip() -%}
+						{{- indent + '// ' + line + '\n' -}}
+					{%- endif -%}
+				{%- endfor -%}
+			{%- endif -%}
+
+			{# ---------- Additional JSON Keys ---------- #}
+			{%- for add_key, add_val in val.items() -%}
+				{%- if add_key in ADDITIONAL_JSON_KEYS -%}
+					{%- if add_val is string -%}
+						{{- indent + '// ' + add_key + ': "' + add_val + '"' + '\n'  -}}
+					{%- else -%}
+						{{- indent + '// ' + add_key + ': ' ~ add_val ~ '\n' -}}
+					{%- endif -%}
+				{%- endif -%}
+			{%- endfor -%}
+
+			{# ---------- Property Definition ---------- #}
+            {%- set type_str = json_schema_to_typescript(
+                val, 
+                indent + "  "
+            ) -%}
+
+			{{- indent + key + ('' if key in required else '?') + ': ' + type_str + ',' -}}
+
+			{%- if "default" in val or "defalut_value" in val -%}
+				{%- set default = val.get("default", val.get("defalut_value")) -%}
+				{%- if default is string -%}
+					{{- ' // default: "' + default + '"' -}}
+				{%- else -%}
+					{{- ' // default: ' ~ default -}}
+				{%- endif -%}
+			{%- endif -%}
+
+			{{- "\n" -}}
+		{%- endfor -%}
+
+		{# Handle additionalProperties as index signature #}
+		{%- if has_additional_props and additional_props_type is not none -%}
+			{%- set additional_type_str = json_schema_to_typescript(
+				additional_props_type,
+				indent + "  "
+			) -%}
+			{{- indent + '[key: string]: ' + additional_type_str + '\n' -}}
+		{%- endif -%}
+
+		{{- indent[: (indent|length - "  "|length) ] + '}' -}}
+
+	{# ---------------- STRING ---------------- #}
+	{%- elif ty == "string" -%}
+		{%- if schema.get("enum") -%}
+            {%- set ns = namespace(enum = []) -%}
+            {%- for en in schema['enum'] -%}
+                {%- set ns.enum = ns.enum + ['"' ~ en ~ '"'] -%}
+            {%- endfor -%}
+			{{- ns.enum | join(' | ') -}}
+		{%- elif schema.get("format", "none") in ['date-time', 'date'] -%}
+			{{- 'Date' -}}
+		{%- else -%}
+			{{- 'string' -}}
+		{%- endif -%}
+
+	{# ---------------- NUMBER / INTEGER ---------------- #}
+	{%- elif ty in ["number", "integer"] -%}
+		{%- if schema.get("enum") -%}
+			{{- schema.enum | join(' | ') -}}
+		{%- else -%}
+			{{- 'number' -}}
+		{%- endif -%}
+
+	{# ---------------- BOOLEAN ---------------- #}
+	{%- elif ty == "boolean" -%}
+		{{- 'boolean' -}}
+
+	{# ---------------- ARRAY ---------------- #}
+	{%- elif ty == "array" -%}
+		{%- if "items" in schema -%}
+			{{- json_schema_to_typescript(schema['items'], indent) + '[]' -}}
+		{%- else -%}
+			{{- 'Array<any>' -}}
+		{%- endif -%}
+
+	{# ---------------- FALLBACK ---------------- #}
+	{%- else -%}
+		{{- 'any' -}}
+	{%- endif -%}
+{%- endmacro -%}
+
+{#---------------------------------------------------------------
+  Renders a namespace and its tool definitions in TypeScript style
+----------------------------------------------------------------#}
+
+{%- macro render_tool_namespace(namespace_name, tools) -%}
+	{%- set ns = namespace(sections = ['namespace ' ~ namespace_name ~ ' {']) -%}
+
+	{%- for tool in tools -%}
+		{%- if tool.function -%}
+			{%- set tool = tool.function -%}
+		{%- endif -%}
+
+		{%- set ns_tool = namespace(content_lines=[]) -%}
+
+		{# ---------- TOOL DESCRIPTION ---------- #}
+		{%- if tool.get('description') -%}
+			{%- for line in tool['description'].split('\n') -%}
+				{%- if line.strip() -%}
+					{%- set ns_tool.content_lines = ns_tool.content_lines + ['// ' ~ line] -%}
+				{%- endif -%}
+			{%- endfor -%}
+		{%- endif -%}
+
+		{# ---------- TOOL SIGNATURE ---------- #}
+		{%- set main_body = "" -%}
+		{%- set params = tool.get("parameters") -%}
+		{%- if params and params.get("properties") -%}
+			{%- set param_type = json_schema_to_typescript(params, "  ") -%}
+			{%- set main_body = 'type ' ~ tool.name ~ ' = (_: ' ~ param_type ~ ') => ' -%}
+		{%- else -%}
+			{%- set main_body = 'type ' ~ tool.name ~ ' = () => ' -%}
+		{%- endif -%}
+
+		{# ---------- RETURN TYPE ---------- #}
+		{%- set return_params = tool.get("return_parameters") -%}
+		{%- if return_params and return_params.get("properties") -%}
+			{%- set return_type = json_schema_to_typescript(return_params, "  ") -%}
+			{%- set main_body = main_body ~ return_type -%}
+		{%- else -%}
+			{%- set main_body = main_body ~ 'any' -%}
+		{%- endif -%}
+
+		{%- set main_body = main_body ~ ';\n' -%}
+
+		{%- set ns_tool.content_lines = ns_tool.content_lines + [main_body] -%}
+
+		{# ---------- ADD TOOL TO SECTIONS ---------- #}
+		{%- set ns.sections = ns.sections + [ns_tool.content_lines | join('\n')] -%}
+	{%- endfor -%}
+
+	{%- set ns.sections = ns.sections + ['} // namespace ' ~ namespace_name] -%}
+
+	{{- ns.sections | join('\n') -}}
+{%- endmacro -%}
+
+
+{# ----------- MESSAGE RENDERING HELPER FUNCTIONS ------------ #}
+
+{%- macro render_role_message(message, role=None) -%}
+    {%- if not role -%}
+        {%- set role = message["role"] -%}
+    {%- endif -%}
+
+    {%- set message_content = message['content'] or '' -%}
+    {%- if message_content is not string -%}
+        {%- set message_content = message_content | tojson(ensure_ascii=False) -%}
+    {%- endif -%}
+
+    {{- role + add_tokens.role_sep + message_content + add_tokens.message_sep -}}
+
+{%- endmacro -%}
+
+
+{%- macro render_function_call(message) -%}
+    {%- set call = message['content'] -%}
+    {%- if call.function -%}
+        {%- set call = call.function -%}
+    {%- endif -%}
+
+    {%- set arguments = call['arguments'] -%}
+	{%- if arguments is not string -%}
+		{%- set arguments = arguments| tojson(ensure_ascii=False) -%}
+	{%- endif -%}
+
+    {{- render_role_message(
+        {
+            'role': 'function call',
+            'content': '{"name": "' ~ call['name'] ~ '", "arguments": ' ~ arguments ~ '}'
+        }
+    ) -}}
+{%- endmacro -%}
+
+{# ----- SPECIAL TOKENS ----- #}
+
+{%- set add_tokens = namespace(
+    role_sep="<|role_sep|>\n", 
+    message_sep="<|message_sep|>\n\n"
+) -%}
+
+{# ----- DEFAULT DEVSYSTEM ----- #}
+
+{%- set DEVSYSTEM -%}
+<role_description>
+Description of the roles available in the dialog.
+
+`developer system`
+A message added by Sber before the main dialog. It has the highest priority and sets global, non-overridable conditions (for example, conversation rules, the safety policy, the assistant's overall response style, etc.).
+
+`system`
+A system instruction added by developers or by the user, but with a lower priority than `developer system`. It usually describes the assistant's instructions, a specific response style, and other conditions for this particular dialog.
+
+`user`
+A message or request from the user. The assistant follows it if it does not conflict with higher-priority instructions (see <instruction_priority>).
+
+`user memory`
+A sequence of the most up-to-date long-term facts about the user at the time of their request, presented as a JSON list of strings. Facts are listed in chronological order, meaning newer facts are appended to the end of the sequence. When facts are changed or deleted, records of previous facts remain in the sequence. The assistant saves facts using a function and uses them in accordance with the <memory_guidelines> block below.
+
+`added files`
+Metadata about files available for use in the dialog, presented in JSON format. It contains the following keys: id (a unique file identifier), name (file name), type (file type).
+
+`assistant`
+The assistant's reply to the user's request. If the system instruction or the user does not set additional rules for `assistant`, this reply must comply with the instructions in the <assistant_guidelines> block below. The list of functions available to call is contained in `function descriptions`. The name of the required function and its arguments will be generated next by the `function call` role. In its replies, the assistant follows the instructions in accordance with <instruction_priority>.
+
+`function descriptions`
+Function descriptions in TypeScript format. A function is a special tool (or a set of instructions) that the assistant can call to perform specific actions, computations, or obtain data needed to solve the user's task. Each function description contains blocks with the name, description, and arguments. Sometimes the description contains separate blocks with return parameters and usage examples that illustrate the correct call and arguments.
+
+`function call`
+The function that `assistant` calls based on the dialog context, and its arguments. The function is invoked in strict accordance with the instructions in the <function_usage> block.
+
+`function result`
+The result of the last function call.
+</role_description>
+
+<available_modalities>
+The assistant can work with the following modalities: text, available functions.
+</available_modalities>
+
+<instruction_priority>
+If instructions from different roles conflict within the dialog context, observe the following priorities:  
+`developer system` > `system` > `user` > `function descriptions` > `function result` > `user memory`
+</instruction_priority>
+
+<function_usage>
+Basic instructions for working with functions.
+
+Only call those functions that are described in `function descriptions`.
+
+Call available functions when, according to their description, such a call will help provide a more complete and/or accurate answer to the user's request. Fill in function arguments using information from the dialog context. If a function could help answer the request but a required argument is missing from the context, ask the user for the missing data before calling the function. If a necessary function is unavailable or an error occurs, briefly inform the user and, if possible, suggest an alternative.
+</function_usage>
+
+<memory_guidelines>
+Rules for using facts in long-term memory:
+
+If there is no message under the `user memory` role in the dialog, this is equivalent to the absence of long-term facts about the user in memory. In that case, information about the user is limited to the current dialog, and no new facts should be saved.
+</memory_guidelines>
+
+<assistant_guidelines>
+You are a helpful assistant.
+
+# Instructions
+- Strictly follow the instruction priority.
+- Maintain a logical chain of reasoning when answering the user's question.
+- For complex questions (for example, STEM), try to answer in detail unless the system message or dialog context limits the response length.
+- Be helpful, truthful, and avoid unsafe or prohibited content in your responses.
+- Try to reply in the language in which the user asked their question.
+</assistant_guidelines>
+
+A dialog will follow below.
+The dialog may include various roles described in the <role_description> block.
+Each turn begins with the role name and a special token that marks the end of the role's full name, and ends with a special end-of-turn token.
+Your task is to continue the dialog from the last specified role in accordance with the dialog context.
+{%- endset -%}
+
+
+{#- ---------------------- RENDERING STARTS HERE ---------------------- -#}
+
+
+{# ----- RENDER BOS TOKEN ----- #}
+{{- bos_token -}}
+
+
+{# ----- RENDER DEVSYSTEM ----- #}
+{{- render_role_message({"role": "developer system", "content": DEVSYSTEM}) -}}
+
+{# ----- RENDER SYSTEM IF PRESENT ----- #}
+{%- if messages and messages[0]['role'] == 'system' -%}
+    {{- render_role_message(messages[0]) -}}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+
+{# ----- RENDER TOOLS ----- #}
+{%- if tools -%}
+    {%- set tools_content = (
+        render_tool_namespace('functions', tools) 
+        + "\n\n"
+    ) -%}
+    {{- render_role_message({'role': 'function descriptions', 'content': tools_content}) -}}
+{%- endif -%}
+
+{# ----- MAIN MESSAGE LOOP ----- #}
+{%- for message in messages -%}
+
+    {# ----- TOOL MESSAGE -------#}
+    {%- if message['role'] == 'tool' -%}
+        {{- render_role_message(message, 'function result') -}}
+
+
+    {# ----- ASSISTANT MESSAGE ----- #}
+    {%- elif message['role'] == 'assistant' -%}
+
+        {# ----- FUNCTION CALL PART CHECKING: SINGLE CALL SETUP ----- #}
+        {%- if message.tool_calls is defined and message.tool_calls -%}
+            {%- set function_call = message.tool_calls[0] -%}
+        {%- else -%}
+            {%- set function_call = None -%}
+        {%- endif -%}
+
+        {# ----- MAIN ASSISTANT RENDERING ----- #}
+
+        {{- render_role_message({'role': 'assistant', 'content': message.content}) -}}
+        {%- if function_call -%}
+            {{- render_function_call({'role': 'function call', 'content': function_call}) -}}
+        {%- endif -%}
+
+        
+    {# ----- OTHER MESSAGES ----- #}
+    {%- else -%}
+        {{- render_role_message(message) -}}
+    {%- endif -%}
+
+    {# ----- ADDING GENERATION PROMPT ----- #}
+
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{- 'assistant' + add_tokens.role_sep -}}
+    {%- endif -%}
+
+{%- endfor -%}
\ No newline at end of file
diff --git a/models/templates/GigaChat3.1-10B-A1.8B.jinja b/models/templates/GigaChat3.1-10B-A1.8B.jinja
new file mode 100644
index 0000000000..0557754acf
--- /dev/null
+++ b/models/templates/GigaChat3.1-10B-A1.8B.jinja
@@ -0,0 +1,339 @@
+{#--------TOOL RENDERING FUNCTIONS---------#}
+
+{#---------------------------------------------------------------
+  Converts JSON Schema (dict) to a TypeScript type definition
+----------------------------------------------------------------#}
+{%- macro json_schema_to_typescript(schema, indent="") -%}
+	{%- set ADDITIONAL_JSON_KEYS = ['format', 'maxItems', 'maximum', 'minItems', 'minimum', 'pattern'] -%}
+	{%- set ty = schema.get("type") -%}
+
+	{# ---------------- OBJECT ---------------- #}
+	{%- if ty == "object" -%}
+		{{- "{\n" -}}
+
+		{# Start building property list #}
+		{%- set props = schema.get("properties", {}) -%}
+		{%- set required = schema.get("required", []) -%}
+		{%- set has_additional_props = schema.get("additionalProperties") is defined -%}
+		{%- set additional_props_type = none -%}
+		{%- if has_additional_props -%}
+			{%- if schema.additionalProperties == true -%}
+				{%- set additional_props_type = {'type': 'any'} -%}
+			{%- elif schema.additionalProperties is mapping -%}
+				{%- set additional_props_type = schema.additionalProperties -%}
+			{%- endif -%}
+		{%- endif -%}
+
+		{%- for key, val in props.items() -%}
+			{# ---------- Description Comments ---------- #}
+			{%- if "description" in val -%}
+				{%- for line in val['description'].split('\n') -%}
+					{%- if line.strip() -%}
+						{{- indent + '// ' + line + '\n' -}}
+					{%- endif -%}
+				{%- endfor -%}
+			{%- endif -%}
+
+			{# ---------- Additional JSON Keys ---------- #}
+			{%- for add_key, add_val in val.items() -%}
+				{%- if add_key in ADDITIONAL_JSON_KEYS -%}
+					{%- if add_val is string -%}
+						{{- indent + '// ' + add_key + ': "' + add_val + '"' + '\n'  -}}
+					{%- else -%}
+						{{- indent + '// ' + add_key + ': ' ~ add_val ~ '\n' -}}
+					{%- endif -%}
+				{%- endif -%}
+			{%- endfor -%}
+
+			{# ---------- Property Definition ---------- #}
+            {%- set type_str = json_schema_to_typescript(
+                val, 
+                indent + "  "
+            ) -%}
+
+			{{- indent + key + ('' if key in required else '?') + ': ' + type_str + ',' -}}
+
+			{%- if "default" in val or "defalut_value" in val -%}
+				{%- set default = val.get("default", val.get("defalut_value")) -%}
+				{%- if default is string -%}
+					{{- ' // default: "' + default + '"' -}}
+				{%- else -%}
+					{{- ' // default: ' ~ default -}}
+				{%- endif -%}
+			{%- endif -%}
+
+			{{- "\n" -}}
+		{%- endfor -%}
+
+		{# Handle additionalProperties as index signature #}
+		{%- if has_additional_props and additional_props_type is not none -%}
+			{%- set additional_type_str = json_schema_to_typescript(
+				additional_props_type,
+				indent + "  "
+			) -%}
+			{{- indent + '[key: string]: ' + additional_type_str + '\n' -}}
+		{%- endif -%}
+
+		{{- indent[: (indent|length - "  "|length) ] + '}' -}}
+
+	{# ---------------- STRING ---------------- #}
+	{%- elif ty == "string" -%}
+		{%- if schema.get("enum") -%}
+            {%- set ns = namespace(enum = []) -%}
+            {%- for en in schema['enum'] -%}
+                {%- set ns.enum = ns.enum + ['"' ~ en ~ '"'] -%}
+            {%- endfor -%}
+			{{- ns.enum | join(' | ') -}}
+		{%- elif schema.get("format", "none") in ['date-time', 'date'] -%}
+			{{- 'Date' -}}
+		{%- else -%}
+			{{- 'string' -}}
+		{%- endif -%}
+
+	{# ---------------- NUMBER / INTEGER ---------------- #}
+	{%- elif ty in ["number", "integer"] -%}
+		{%- if schema.get("enum") -%}
+			{{- schema.enum | join(' | ') -}}
+		{%- else -%}
+			{{- 'number' -}}
+		{%- endif -%}
+
+	{# ---------------- BOOLEAN ---------------- #}
+	{%- elif ty == "boolean" -%}
+		{{- 'boolean' -}}
+
+	{# ---------------- ARRAY ---------------- #}
+	{%- elif ty == "array" -%}
+		{%- if "items" in schema -%}
+			{{- json_schema_to_typescript(schema['items'], indent) + '[]' -}}
+		{%- else -%}
+			{{- 'Array<any>' -}}
+		{%- endif -%}
+
+	{# ---------------- FALLBACK ---------------- #}
+	{%- else -%}
+		{{- 'any' -}}
+	{%- endif -%}
+{%- endmacro -%}
+
+{#---------------------------------------------------------------
+  Renders a namespace and its tool definitions in TypeScript style
+----------------------------------------------------------------#}
+
+{%- macro render_tool_namespace(namespace_name, tools) -%}
+	{%- set ns = namespace(sections = ['namespace ' ~ namespace_name ~ ' {']) -%}
+
+	{%- for tool in tools -%}
+		{%- if tool.function -%}
+			{%- set tool = tool.function -%}
+		{%- endif -%}
+
+		{%- set ns_tool = namespace(content_lines=[]) -%}
+
+		{# ---------- TOOL DESCRIPTION ---------- #}
+		{%- if tool.get('description') -%}
+			{%- for line in tool['description'].split('\n') -%}
+				{%- if line.strip() -%}
+					{%- set ns_tool.content_lines = ns_tool.content_lines + ['// ' ~ line] -%}
+				{%- endif -%}
+			{%- endfor -%}
+		{%- endif -%}
+
+		{# ---------- TOOL SIGNATURE ---------- #}
+		{%- set main_body = "" -%}
+		{%- set params = tool.get("parameters") -%}
+		{%- if params and params.get("properties") -%}
+			{%- set param_type = json_schema_to_typescript(params, "  ") -%}
+			{%- set main_body = 'type ' ~ tool.name ~ ' = (_: ' ~ param_type ~ ') => ' -%}
+		{%- else -%}
+			{%- set main_body = 'type ' ~ tool.name ~ ' = () => ' -%}
+		{%- endif -%}
+
+		{# ---------- RETURN TYPE ---------- #}
+		{%- set return_params = tool.get("return_parameters") -%}
+		{%- if return_params and return_params.get("properties") -%}
+			{%- set return_type = json_schema_to_typescript(return_params, "  ") -%}
+			{%- set main_body = main_body ~ return_type -%}
+		{%- else -%}
+			{%- set main_body = main_body ~ 'any' -%}
+		{%- endif -%}
+
+		{%- set main_body = main_body ~ ';\n' -%}
+
+		{%- set ns_tool.content_lines = ns_tool.content_lines + [main_body] -%}
+
+		{# ---------- ADD TOOL TO SECTIONS ---------- #}
+		{%- set ns.sections = ns.sections + [ns_tool.content_lines | join('\n')] -%}
+	{%- endfor -%}
+
+	{%- set ns.sections = ns.sections + ['} // namespace ' ~ namespace_name] -%}
+
+	{{- ns.sections | join('\n') -}}
+{%- endmacro -%}
+
+
+{# ----------- MESSAGE RENDERING HELPER FUNCTIONS ------------ #}
+
+{%- macro render_function_call(call) -%}
+    {%- if call.function -%}
+        {%- set call = call.function -%}
+    {%- endif -%}
+
+    {%- set arguments = call['arguments'] -%}
+	{%- if arguments is not string -%}
+		{%- set arguments = arguments| tojson(ensure_ascii=False) -%}
+	{%- endif -%}
+
+    {{- '{"name": "' ~ call['name'] ~ '", "arguments": ' ~ arguments ~ '}' -}}
+{%- endmacro -%}
+
+
+{%- macro render_role_message(message, role=None) -%}
+    {%- if not role -%}
+        {%- set role = message["role"] -%}
+    {%- endif -%}
+
+    {%- set message_content = message['content'] or '' -%}
+    {%- if message_content is not string -%}
+        {%- set message_content = message_content | tojson(ensure_ascii=False) -%}
+    {%- endif -%}
+
+    {{- role + add_tokens.role_sep + message_content -}}
+
+    {%- if message.tool_calls is defined and message.tool_calls -%}
+        {{- add_tokens.function_call + render_function_call(message.tool_calls[0]) -}}
+    {%- endif -%}
+
+    {{- add_tokens.message_sep -}}
+
+{%- endmacro -%}
+
+
+
+{# ----- SPECIAL TOKENS ----- #}
+
+{%- set add_tokens = namespace(
+    role_sep="<|role_sep|>\n", 
+    message_sep="<|message_sep|>\n\n",
+    function_call="<|function_call|>"
+) -%}
+
+{# ----- DEFAULT DEVSYSTEM ----- #}
+
+{%- set DEVSYSTEM -%}
+<role_description>
+Description of the roles available in the dialog.
+
+`developer system`
+A message added by Sber before the main dialog. It has the highest priority and sets global, non-overridable conditions (for example, conversation rules, the safety policy, the assistant's overall response style, etc.).
+
+`system`
+A system instruction added by developers or by the user, but with a lower priority than `developer system`. It usually describes the assistant's instructions, a specific response style, and other conditions for this particular dialog.
+
+`user`
+A message or request from the user. The assistant follows it if it does not conflict with higher-priority instructions (see <instruction_priority>).
+
+`user memory`
+A sequence of the most up-to-date long-term facts about the user at the time of their request, presented as a JSON list of strings. Facts are listed in chronological order, meaning newer facts are appended to the end of the sequence. When facts are changed or deleted, records of previous facts remain in the sequence. The assistant saves facts using a function and uses them in accordance with the <memory_guidelines> block below.
+
+`added files`
+Metadata about files available for use in the dialog, presented in JSON format. It contains the following keys: id (a unique file identifier), name (file name), type (file type).
+
+`assistant`
+The assistant's reply to the user's request. If the system instruction or the user does not set additional rules for `assistant`, this reply must comply with the instructions in the <assistant_guidelines> block below. The list of functions available to call is contained in `function descriptions`. The name of the required function and its arguments will be generated next by the `function call` role. In its replies, the assistant follows the instructions in accordance with <instruction_priority>.
+
+`function descriptions`
+Function descriptions in TypeScript format. A function is a special tool (or a set of instructions) that the assistant can call to perform specific actions, computations, or obtain data needed to solve the user's task. Each function description contains blocks with the name, description, and arguments. Sometimes the description contains separate blocks with return parameters and usage examples that illustrate the correct call and arguments.
+
+`function call`
+The function that `assistant` calls based on the dialog context, and its arguments. The function is invoked in strict accordance with the instructions in the <function_usage> block.
+
+`function result`
+The result of the last function call.
+</role_description>
+
+<available_modalities>
+The assistant can work with the following modalities: text, available functions.
+</available_modalities>
+
+<instruction_priority>
+If instructions from different roles conflict within the dialog context, observe the following priorities:  
+`developer system` > `system` > `user` > `function descriptions` > `function result` > `user memory`
+</instruction_priority>
+
+<function_usage>
+Basic instructions for working with functions.
+
+Only call those functions that are described in `function descriptions`.
+
+Call available functions when, according to their description, such a call will help provide a more complete and/or accurate answer to the user's request. Fill in function arguments using information from the dialog context. If a function could help answer the request but a required argument is missing from the context, ask the user for the missing data before calling the function. If a necessary function is unavailable or an error occurs, briefly inform the user and, if possible, suggest an alternative.
+</function_usage>
+
+<memory_guidelines>
+Rules for using facts in long-term memory:
+
+If there is no message under the `user memory` role in the dialog, this is equivalent to the absence of long-term facts about the user in memory. In that case, information about the user is limited to the current dialog, and no new facts should be saved.
+</memory_guidelines>
+
+<assistant_guidelines>
+You are a helpful assistant.
+
+# Instructions
+- Strictly follow the instruction priority.
+- Maintain a logical chain of reasoning when answering the user's question.
+- For complex questions (for example, STEM), try to answer in detail unless the system message or dialog context limits the response length.
+- Be helpful, truthful, and avoid unsafe or prohibited content in your responses.
+- Try to reply in the language in which the user asked their question.
+</assistant_guidelines>
+
+A dialog will follow below.
+The dialog may include various roles described in the <role_description> block.
+Each turn begins with the role name and a special token that marks the end of the role's full name, and ends with a special end-of-turn token.
+Your task is to continue the dialog from the last specified role in accordance with the dialog context.
+{%- endset -%}
+
+
+{#- ---------------------- RENDERING STARTS HERE ---------------------- -#}
+
+
+{# ----- RENDER BOS TOKEN ----- #}
+{{- bos_token -}}
+
+
+{# ----- RENDER DEVSYSTEM ----- #}
+{{- render_role_message({"role": "developer system", "content": DEVSYSTEM}) -}}
+
+{# ----- RENDER SYSTEM IF PRESENT ----- #}
+{%- if messages and messages[0]['role'] == 'system' -%}
+    {{- render_role_message(messages[0]) -}}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+
+{# ----- RENDER TOOLS ----- #}
+{%- if tools -%}
+    {%- set tools_content = (
+        render_tool_namespace('functions', tools) 
+        + "\n\n"
+    ) -%}
+    {{- render_role_message({'role': 'function descriptions', 'content': tools_content}) -}}
+{%- endif -%}
+
+{# ----- MAIN MESSAGE LOOP ----- #}
+{%- for message in messages -%}
+
+    {# ----- TOOL MESSAGE -------#}
+    {%- if message['role'] == 'tool' -%}
+        {{- render_role_message(message, 'function result') -}}
+        
+    {# ----- OTHER MESSAGES ----- #}
+    {%- else -%}
+        {{- render_role_message(message) -}}
+    {%- endif -%}
+
+    {# ----- ADDING GENERATION PROMPT ----- #}
+
+    {%- if loop.last and add_generation_prompt and message['role'] != 'assistant' -%}
+        {{- 'assistant' + add_tokens.role_sep -}}
+    {%- endif -%}
+
+{%- endfor -%}
\ No newline at end of file
diff --git a/models/templates/HuggingFaceTB-SmolLM3-3B.jinja b/models/templates/HuggingFaceTB-SmolLM3-3B.jinja
new file mode 100644
index 0000000000..b605d93439
--- /dev/null
+++ b/models/templates/HuggingFaceTB-SmolLM3-3B.jinja
@@ -0,0 +1,61 @@
+{#- Copyright 2025-present the Unsloth team. All rights reserved. #}
+{#- Licensed under the Apache License, Version 2.0 (the "License") #}
+{#- Edits made by Unsloth to make it work for most inference engines #}
+{# ───── defaults ───── #}
+{%- if enable_thinking is not defined -%}
+    {%- set enable_thinking = true -%}
+{%- endif -%}
+{# ───── reasoning mode ───── #}
+{%- if enable_thinking -%}
+    {%- set reasoning_mode = "/think" -%}
+{%- else -%}
+    {%- set reasoning_mode = "/no_think" -%}
+{%- endif -%}
+{# ───── header (system message) ───── #}
+{{- "<|im_start|>system\n" -}}
+{%- if messages[0].role == "system" -%}
+    {%- set system_message = messages[0].content -%}
+    {%- if "/no_think" in system_message -%}
+        {%- set reasoning_mode = "/no_think" -%}
+    {%- elif "/think" in system_message -%}
+        {%- set reasoning_mode = "/think" -%}
+    {%- endif -%}
+    {%- set custom_instructions = system_message.replace("/no_think", "") -%}
+    {%- set custom_instructions = custom_instructions.replace("/think", "") -%}
+    {%- set custom_instructions = custom_instructions.rstrip() -%}
+{%- endif -%}
+{{- "## Metadata\n\n" -}}
+{{- "Knowledge Cutoff Date: June 2025\n" -}}
+{{- "Reasoning Mode: " + reasoning_mode + "\n\n" -}}
+{{- "## Custom Instructions\n\n" -}}
+{%- if custom_instructions -%}
+    {{- custom_instructions + "\n\n" -}}
+{%- elif reasoning_mode == "/think" -%}
+    {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face. Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracking, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> Thought section </think> Solution section. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion.\n\n" -}}
+{%- else -%}
+    {{- "You are a helpful AI assistant named SmolLM, trained by Hugging Face.\n\n" -}}
+{%- endif -%}
+{{- "<|im_end|>\n" -}}
+{# ───── main loop ───── #}
+{%- for message in messages -%}
+    {%- set content = message.content if message.content is string else "" -%}
+    {%- if message.role == "user" -%}
+        {{ "<|im_start|>" + message.role + "\n" + content + "<|im_end|>\n" }}
+    {%- elif message.role == "assistant" -%}
+        {%- if reasoning_mode == "/think" -%}
+            {{ "<|im_start|>assistant\n" + content.lstrip("\n") + "<|im_end|>\n" }}
+        {%- else -%}
+            {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" + content.lstrip("\n") + "<|im_end|>\n" }}
+        {%- endif -%}
+    {%- elif message.role == "tool" -%}
+        {{ "<|im_start|>" + "user\n" + content + "<|im_end|>\n" }}
+    {%- endif -%}
+{%- endfor -%}
+{# ───── generation prompt ───── #}
+{%- if add_generation_prompt -%}
+    {%- if reasoning_mode == "/think" -%}
+        {{ "<|im_start|>assistant\n" }}
+    {%- else -%}
+        {{ "<|im_start|>assistant\n" + "<think>\n\n</think>\n" }}
+    {%- endif -%}
+{%- endif -%}
diff --git a/models/templates/LFM2-8B-A1B.jinja b/models/templates/LFM2-8B-A1B.jinja
new file mode 100644
index 0000000000..fab22e952b
--- /dev/null
+++ b/models/templates/LFM2-8B-A1B.jinja
@@ -0,0 +1,37 @@
+{{- bos_token -}}
+{%- set system_prompt = "" -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+	{%- set ns.system_prompt = messages[0]["content"] -%}
+	{%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+	{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%}
+	{%- for tool in tools -%}
+		{%- if tool is not string -%}
+			{%- set tool = tool | tojson -%}
+		{%- endif -%}
+		{%- set ns.system_prompt = ns.system_prompt + tool -%}
+		{%- if not loop.last -%}
+			{%- set ns.system_prompt = ns.system_prompt + ", " -%}
+		{%- endif -%}
+	{%- endfor -%}
+	{%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+	{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+	{{- "<|im_start|>" + message["role"] + "\n" -}}
+	{%- set content = message["content"] -%}
+	{%- if content is not string -%}
+		{%- set content = content | tojson -%}
+	{%- endif -%}
+	{%- if message["role"] == "tool" -%}
+		{%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%}
+	{%- endif -%}
+	{{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+	{{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/LFM2.5-Instruct.jinja b/models/templates/LFM2.5-Instruct.jinja
new file mode 100644
index 0000000000..7778756dd9
--- /dev/null
+++ b/models/templates/LFM2.5-Instruct.jinja
@@ -0,0 +1,45 @@
+{{- bos_token -}}
+{%- set keep_past_thinking = keep_past_thinking | default(false) -%}
+{%- set ns = namespace(system_prompt="") -%}
+{%- if messages[0]["role"] == "system" -%}
+    {%- set ns.system_prompt = messages[0]["content"] -%}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
+    {%- for tool in tools -%}
+        {%- if tool is not string -%}
+            {%- set tool = tool | tojson -%}
+        {%- endif -%}
+        {%- set ns.system_prompt = ns.system_prompt + tool -%}
+        {%- if not loop.last -%}
+            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- set ns.last_assistant_index = -1 -%}
+{%- for message in messages -%}
+    {%- if message["role"] == "assistant" -%}
+        {%- set ns.last_assistant_index = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+    {{- "<|im_start|>" + message["role"] + "\n" -}}
+    {%- set content = message["content"] -%}
+    {%- if content is not string -%}
+        {%- set content = content | tojson -%}
+    {%- endif -%}
+    {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
+        {%- if "</think>" in content -%}
+            {%- set content = content.split("</think>")[-1] | trim -%}
+        {%- endif -%}
+    {%- endif -%}
+    {{- content + "<|im_end|>\n" -}}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/models/templates/Qwen-QwQ-32B.jinja b/models/templates/Qwen-QwQ-32B.jinja
index d475f70687..ce314a039f 100644
--- a/models/templates/Qwen-QwQ-32B.jinja
+++ b/models/templates/Qwen-QwQ-32B.jinja
@@ -59,4 +59,5 @@
 {%- endfor %}
 {%- if add_generation_prompt %}
     {{- '<|im_start|>assistant\n<think>\n' }}
+    {%- if not enable_thinking -%}{{- '</think>' -}}{%- endif -%}
 {%- endif %}
diff --git a/models/templates/Qwen3-Coder.jinja b/models/templates/Qwen3-Coder.jinja
index 49b0e8d0ee..cde8c0e43d 100644
--- a/models/templates/Qwen3-Coder.jinja
+++ b/models/templates/Qwen3-Coder.jinja
@@ -29,7 +29,7 @@
     {%- endif %}
 {%- endif %}
 {%- if tools is iterable and tools | length > 0 %}
-    {{- "\n\n# Tools\n\nYou have access to the following functions:\n\n" }}
+    {{- "\n\n# Tools\n\nYou have access to the following tools:\n\n" }}
     {{- "<tools>" }}
     {%- for tool in tools %}
         {%- if tool.function is defined %}
@@ -63,7 +63,7 @@
         {{- '\n</function>' }}
     {%- endfor %}
     {{- "\n</tools>" }}
-    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {{- '\n\nIf you choose to call a tool ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nvalue_2\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: the tool calling block MUST begin with an opening <tool_call> tag and end with a closing </tool_call> tag.\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
 {%- endif %}
 {%- if system_message is defined %}
     {{- '<|im_end|>\n' }}
diff --git a/models/templates/Qwen3.5-4B.jinja b/models/templates/Qwen3.5-4B.jinja
new file mode 100644
index 0000000000..a585dec894
--- /dev/null
+++ b/models/templates/Qwen3.5-4B.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+    {%- if content is string %}
+        {{- content }}
+    {%- elif content is iterable and content is not mapping %}
+        {%- for item in content %}
+            {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain images.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set image_count.value = image_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Picture ' ~ image_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+            {%- elif 'video' in item or item.type == 'video' %}
+                {%- if is_system_content %}
+                    {{- raise_exception('System message cannot contain videos.') }}
+                {%- endif %}
+                {%- if do_vision_count %}
+                    {%- set video_count.value = video_count.value + 1 %}
+                {%- endif %}
+                {%- if add_vision_id %}
+                    {{- 'Video ' ~ video_count.value ~ ': ' }}
+                {%- endif %}
+                {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+            {%- elif 'text' in item %}
+                {{- item.text }}
+            {%- else %}
+                {{- raise_exception('Unexpected item type in content.') }}
+            {%- endif %}
+        {%- endfor %}
+    {%- elif content is none or content is undefined %}
+        {{- '' }}
+    {%- else %}
+        {{- raise_exception('Unexpected content type.') }}
+    {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+    {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+    {{- '<|im_start|>system\n' }}
+    {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>" }}
+    {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {%- if content %}
+            {{- '\n\n' + content }}
+        {%- endif %}
+    {%- endif %}
+    {{- '<|im_end|>\n' }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {%- set content = render_content(messages[0].content, false, true)|trim %}
+        {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" %}
+        {%- set content = render_content(message.content, false)|trim %}
+        {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
+            {%- set ns.multi_step_tool = false %}
+            {%- set ns.last_query_index = index %}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+    {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content, true)|trim %}
+    {%- if message.role == "system" %}
+        {%- if not loop.first %}
+            {{- raise_exception('System message must be at the beginning.') }}
+        {%- endif %}
+    {%- elif message.role == "user" %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- set reasoning_content = reasoning_content|trim %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {%- if loop.first %}
+                    {%- if content|trim %}
+                        {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- else %}
+                        {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                    {%- endif %}
+                {%- else %}
+                    {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- endif %}
+                {%- if tool_call.arguments is defined %}
+                    {%- for args_name, args_value in tool_call.arguments|items %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.previtem and loop.previtem.role != "tool" %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if not loop.last and loop.nextitem.role != "tool" %}
+            {{- '<|im_end|>\n' }}
+        {%- elif loop.last %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- else %}
+        {{- raise_exception('Unexpected message role.') }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- else %}
+        {{- '<think>\n' }}
+    {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/StepFun3.5-Flash.jinja b/models/templates/StepFun3.5-Flash.jinja
new file mode 100644
index 0000000000..c09ea497da
--- /dev/null
+++ b/models/templates/StepFun3.5-Flash.jinja
@@ -0,0 +1,80 @@
+{% macro render_content(content) %}{% if content is none %}{{- '' }}{% elif content is string %}{{- content }}{% elif content is mapping %}{{- content['value'] if 'value' in content else content['text'] }}{% elif content is iterable %}{% for item in content %}{% if item.type == 'text' %}{{- item['value'] if 'value' in item else item['text'] }}{% elif item.type == 'image' %}<im_patch>{% endif %}{% endfor %}{% endif %}{% endmacro %}
+{{bos_token}}{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- render_content(messages[0].content) + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou have access to the following functions in JSONSchema format:\n\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson(ensure_ascii=False) }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...>\n...\n</function> block must be nested within <tool_call>\n...\n</tool_call> XML tags\n- Required parameters MUST be specified\n</IMPORTANT><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + render_content(messages[0].content) + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and render_content(message.content) is string and not(render_content(message.content).startswith('<tool_response>') and render_content(message.content).endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- set content = render_content(message.content) %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {%- set role_name = 'observation' if (message.role == "system" and not loop.first and message.name == 'observation') else message.role %}
+        {{- '<|im_start|>' + role_name + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = render_content(message.reasoning_content) %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- else %}
+                {%- set reasoning_content = '' %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n' + content }}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if tool_call.function is defined %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
+                {%- if tool_call.arguments is defined %}
+                    {%- set arguments = tool_call.arguments %}
+                    {%- for args_name, args_value in arguments|items %}
+                        {{- '<parameter=' + args_name + '>\n' }}
+                        {%- set args_value = args_value | tojson(ensure_ascii=False) | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+                        {{- args_value }}
+                        {{- '\n</parameter>\n' }}
+                    {%- endfor %}
+                {%- endif %}
+                {{- '</function>\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>tool_response\n' }}
+        {%- endif %}
+        {{- '<tool_response>' }}
+        {{- content }}
+        {{- '</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n<think>\n' }}
+{%- endif %}
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
index c2066bd739..299f7a7ff1 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
@@ -1 +1,44 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is none -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
index c2066bd739..7349ce9eca 100644
--- a/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
@@ -1 +1,47 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\n'}}{% endif %}
\ No newline at end of file
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- set ns.system_prompt = message['content'] -%}
+  {%- endif -%}
+{%- endfor -%}{{bos_token}}{{ns.system_prompt}}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls']-%}
+      {%- if not ns.is_first -%}
+        {{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+        {%- set ns.is_first = true -%}
+      {%- else -%}
+        {{'\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] | tojson + '\n' + '```' + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}
+    {{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['content'] is not none -%}
+    {%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+    {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>')[-1] -%}
+      {%- endif -%}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_tool = true -%}
+    {%- if ns.is_output_first -%}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+      {%- set ns.is_output_first = false -%}
+      {%- else -%}{{'\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if ns.is_tool -%}{{'<｜tool▁outputs▁end｜>'}}
+{%- endif -%}
+{%- if add_generation_prompt and not ns.is_tool -%}{{'<｜Assistant｜><think>\n'}}{% if not enable_thinking %}{{- '</think>' -}}{% endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
index e5656196a3..e987c2a16e 100644
--- a/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
+++ b/models/templates/deepseek-ai-DeepSeek-V3.1.jinja
@@ -1,3 +1,71 @@
-{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '
+{% if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+  {%- if enable_thinking is defined -%}
+    {%- set thinking = enable_thinking -%}
+    {%- else -%}
+    {%- set thinking = false -%}
+  {%- endif -%}
+{%- endif -%}
+{%- set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- if ns.is_first_sp -%}
+      {%- set ns.system_prompt = ns.system_prompt + message['content'] -%}
+      {%- set ns.is_first_sp = false -%}
+      {%- else -%}
+      {%- set ns.system_prompt = ns.system_prompt + '
 
-' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<｜Assistant｜></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- else %}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] + '<｜tool▁call▁end｜>'}}{%- endif %}{%- endfor %}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<｜Assistant｜>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}}  {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<｜Assistant｜>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}
\ No newline at end of file
+' + message['content'] -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {%- set ns.is_tool = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_last_user = true -%}{{'<｜User｜>' + message['content']}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and message['tool_calls'] -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜><think></think>'}}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_first = false -%}
+    {%- set ns.is_tool = false -%}
+    {%- for tool in message['tool_calls'] -%}
+      {%- if not ns.is_first -%}
+        {%- if not message['content'] -%}{{'<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] | tojson + '<｜tool▁call▁end｜>'}}
+          {%- else -%}{{message['content'] + '<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] | tojson + '<｜tool▁call▁end｜>'}}
+        {%- endif -%}
+        {%- set ns.is_first = true -%}
+        {%- else -%}{{'<｜tool▁call▁begin｜>'+ tool['function']['name'] + '<｜tool▁sep｜>' + tool['function']['arguments'] | tojson + '<｜tool▁call▁end｜>'}}
+      {%- endif -%}
+    {%- endfor -%}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}
+  {%- endif -%}
+  {%- if message['role'] == 'assistant' and not message['tool_calls'] -%}
+    {%- if ns.is_last_user -%}{{'<｜Assistant｜>'}}
+      {%- if message['prefix'] is defined and message['prefix'] and thinking -%}{{'<think>'}}
+        {%- else -%}{{'<think></think>'}}
+      {%- endif -%}
+    {%- endif -%}
+    {%- set ns.is_last_user = false -%}
+    {%- if ns.is_tool -%}{{message['content'] + '<｜end▁of▁sentence｜>'}}
+      {%- set ns.is_tool = false -%}
+      {%- else -%}
+      {%- set content = message['content'] -%}
+      {%- if '</think>' in content -%}
+        {%- set content = content.split('</think>', 1)[1] -%}
+      {%- endif -%}{{content + '<｜end▁of▁sentence｜>'}}
+    {%- endif -%}
+  {%- endif -%}
+  {%- if message['role'] == 'tool' -%}
+    {%- set ns.is_last_user = false -%}
+    {%- set ns.is_tool = true -%}{{'<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}
+  {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool -%}{{'<｜Assistant｜>'}}
+  {%- if not thinking -%}{{'<think></think>'}}
+  {%- else -%}{{'<think>'}}
+  {%- endif -%}
+{%- endif %}
\ No newline at end of file
diff --git a/models/templates/deepseek-ai-DeepSeek-V3.2.jinja b/models/templates/deepseek-ai-DeepSeek-V3.2.jinja
new file mode 100644
index 0000000000..98d56589f0
--- /dev/null
+++ b/models/templates/deepseek-ai-DeepSeek-V3.2.jinja
@@ -0,0 +1,141 @@
+{%- if not add_generation_prompt is defined -%}
+  {%- set add_generation_prompt = false -%}
+{%- endif -%}
+{%- if not thinking is defined -%}
+  {%- if enable_thinking is defined -%}
+    {%- set thinking = enable_thinking -%}
+  {%- else -%}
+    {%- set thinking = false -%}
+  {%- endif -%}
+{%- endif -%}
+{%- set dsml_token = '｜DSML｜' -%}
+{%- set thinking_start_token = '<think>' -%}
+{%- set thinking_end_token = '</think>' -%}
+{%- set tools_header = '## Tools\n\nYou have access to a set of tools you can use to answer the user\'s question.\nYou can invoke functions by writing a "<' + dsml_token + 'function_calls>" block like the following as part of your reply to the user:\n<' + dsml_token + 'function_calls>\n<' + dsml_token + 'invoke name="$FUNCTION_NAME">\n<' + dsml_token + 'parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</' + dsml_token + 'parameter>\n...\n</' + dsml_token + 'invoke>\n<' + dsml_token + 'invoke name="$FUNCTION_NAME2">\n...\n</' + dsml_token + 'invoke>\n</' + dsml_token + 'function_calls>\n\nString and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).\n\nIf the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:\n\n<' + dsml_token + 'function_calls>\n...\n</' + dsml_token + 'function_calls>\n\n<function_results>\n...\n</function_results>\n\n' + thinking_start_token + '...thinking about results' + thinking_end_token + '\n\nHere are the functions available in JSONSchema format:\n<functions>\n' -%}
+{%- set tools_footer = '</functions>\n' -%}
+{%- set ns = namespace(system_prompt='', is_first_sp=true) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'system' -%}
+    {%- if ns.is_first_sp -%}
+      {%- set ns.system_prompt = ns.system_prompt + (message['content'] or '') -%}
+      {%- set ns.is_first_sp = false -%}
+    {%- else -%}
+      {%- set ns.system_prompt = ns.system_prompt + '\n\n' + (message['content'] or '') -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if tools is defined and tools -%}
+  {%- set ts = namespace(schemas='') -%}
+  {%- for tool in tools -%}
+    {%- if tool['type'] == 'function' -%}
+      {%- set ts.schemas = ts.schemas + (tool['function'] | tojson) + '\n' -%}
+    {%- endif -%}
+  {%- endfor -%}
+  {%- if ns.system_prompt -%}
+    {%- set ns.system_prompt = ns.system_prompt + '\n\n' + tools_header + ts.schemas + tools_footer -%}
+  {%- else -%}
+    {%- set ns.system_prompt = tools_header + ts.schemas + tools_footer -%}
+  {%- endif -%}
+{%- endif -%}
+{{- bos_token -}}
+{{- ns.system_prompt -}}
+{%- set last_user_idx = namespace(value=-1) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' or message['role'] == 'developer' -%}
+    {%- set last_user_idx.value = loop.index0 -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- set state = namespace(pending_asst_marker=false, pending_tool_marker=false) -%}
+{%- for message in messages -%}
+  {%- if message['role'] == 'user' -%}
+    {{- '<｜User｜>' + (message['content'] or '') -}}
+    {%- set state.pending_asst_marker = true -%}
+    {%- set state.pending_tool_marker = false -%}
+  {%- elif message['role'] == 'assistant' -%}
+    {%- set is_after_last_user = loop.index0 > last_user_idx.value -%}
+    {%- if state.pending_asst_marker -%}
+      {{- '<｜Assistant｜>' -}}
+      {%- if is_after_last_user and thinking -%}
+        {{- thinking_start_token -}}
+        {%- if message['reasoning_content'] is defined and message['reasoning_content'] -%}
+          {{- message['reasoning_content'] -}}
+        {%- endif -%}
+        {{- thinking_end_token -}}
+      {%- else -%}
+        {{- thinking_end_token -}}
+      {%- endif -%}
+    {%- elif state.pending_tool_marker -%}
+      {%- if is_after_last_user and thinking -%}
+        {{- '\n\n' + thinking_start_token -}}
+        {%- if message['reasoning_content'] is defined and message['reasoning_content'] -%}
+          {{- message['reasoning_content'] -}}
+        {%- endif -%}
+        {{- thinking_end_token -}}
+      {%- else -%}
+        {{- '\n\n' + thinking_end_token -}}
+      {%- endif -%}
+    {%- endif -%}
+    {%- set state.pending_asst_marker = false -%}
+    {%- set state.pending_tool_marker = false -%}
+    {%- if message['content'] is defined and message['content'] -%}
+      {{- message['content'] -}}
+    {%- endif -%}
+    {%- if message['tool_calls'] -%}
+      {{- '\n\n<' + dsml_token + 'function_calls>\n' -}}
+      {%- for tool in message['tool_calls'] -%}
+        {%- set func = tool['function'] -%}
+        {{- '<' + dsml_token + 'invoke name="' + func['name'] + '">\n' -}}
+        {%- set args = func['arguments'] -%}
+        {%- if args is string -%}
+          {%- set args = args | from_json -%}
+        {%- endif -%}
+        {%- for key, val in args.items() -%}
+          {%- if val is string -%}
+            {{- '<' + dsml_token + 'parameter name="' + key + '" string="true">' + val + '</' + dsml_token + 'parameter>\n' -}}
+          {%- else -%}
+            {{- '<' + dsml_token + 'parameter name="' + key + '" string="false">' + (val | tojson) + '</' + dsml_token + 'parameter>\n' -}}
+          {%- endif -%}
+        {%- endfor -%}
+        {{- '</' + dsml_token + 'invoke>\n' -}}
+      {%- endfor -%}
+      {{- '</' + dsml_token + 'function_calls>' -}}
+    {%- endif -%}
+    {{- '<｜end▁of▁sentence｜>' -}}
+  {%- elif message['role'] == 'tool' -%}
+    {%- set outer_index = loop.index0 -%}
+    {%- set assistant_idx = namespace(value=-1) -%}
+    {%- for prev_msg in messages -%}
+      {%- if prev_msg['role'] == 'assistant' and prev_msg['tool_calls'] and loop.index0 < outer_index -%}
+        {%- set assistant_idx.value = loop.index0 -%}
+      {%- endif -%}
+    {%- endfor -%}
+    {%- set call_order = outer_index - assistant_idx.value -%}
+    {%- set assistant_msg = messages[assistant_idx.value] -%}
+    {%- set tool_call_count = assistant_msg['tool_calls'] | length -%}
+    {%- if call_order == 1 -%}
+      {{- '\n\n<function_results>' -}}
+    {%- endif -%}
+    {{- '\n<result>' + (message['content'] or '') + '</result>' -}}
+    {%- if call_order == tool_call_count -%}
+      {{- '\n</function_results>' -}}
+      {%- set state.pending_asst_marker = false -%}
+      {%- set state.pending_tool_marker = true -%}
+    {%- endif -%}
+  {%- endif -%}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+  {%- if state.pending_asst_marker -%}
+    {{- '<｜Assistant｜>' -}}
+    {%- if thinking -%}
+      {{- thinking_start_token -}}
+    {%- else -%}
+      {{- thinking_start_token + thinking_end_token -}}
+    {%- endif -%}
+  {%- elif state.pending_tool_marker -%}
+    {%- if thinking -%}
+      {{- '\n\n' + thinking_start_token -}}
+    {%- else -%}
+      {{- '\n\n' + thinking_start_token + thinking_end_token -}}
+    {%- endif -%}
+  {%- endif -%}
+{%- endif -%}
diff --git a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
index 9b8136df73..b94cfd4d9b 100644
--- a/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
+++ b/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja
@@ -46,7 +46,7 @@ Available functions as JSON spec:
     {%- if 'tool_calls' in message and message['tool_calls'] -%}
       {%- set tool = namespace(calls=[]) -%}
       {%- for call in message['tool_calls'] -%}
-        {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments'] + '}'] -%}
+        {%- set tool.calls = tool.calls + ['{"name": "' + call['function']['name'] + '", "arguments": ' + call['function']['arguments']|tojson + '}'] -%}
       {%- endfor -%}
       {%- set ns.content = ns.content + ' functools[' + tool.calls | join(', ') + ']' -%}
     {%- endif -%}
diff --git a/models/templates/google-gemma-4-31B-it-interleaved.jinja b/models/templates/google-gemma-4-31B-it-interleaved.jinja
new file mode 100644
index 0000000000..85791c4fe5
--- /dev/null
+++ b/models/templates/google-gemma-4-31B-it-interleaved.jinja
@@ -0,0 +1,282 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'OBJECT' -%}
+                ,properties:{
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                {%- elif value is mapping -%}
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                {%- endif -%}
+                }
+                {%- if value['required'] -%}
+                    ,required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    ,items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(prev_message_type=None, last_user_message=-1) -%}
+{%- set loop_messages = messages -%}
+{{- bos_token -}}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>\n' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {{- messages[0]['content'] | trim -}}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+
+    {{- '<turn|>\n' -}}
+{%- endif %}
+
+{#- Find last user message -#}
+{%- for message in loop_messages -%}
+    {%- if message['role'] == 'user' -%}
+        {%- set ns.last_user_message = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+        {%- if not (ns.prev_message_type == 'tool_response' and message['tool_calls']) -%}
+        {{- '<|turn>' + role + '\n' }}
+        {%- endif -%}
+
+        {%- set ns.prev_message_type = None -%}
+
+            {%- if message['tool_calls'] -%}
+                {#- Preserve reasoning between tool calls for model turns that come after the last user turn -#} 
+                {%- if message['reasoning_content'] and loop.index0 > ns.last_user_message -%}
+                  {{- '<|channel>thought\n' -}}
+                  {{- message['reasoning_content'] -}}
+                  {{- '<channel|>' -}}
+                {%- endif -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+
+            {%- if message['tool_responses'] -%}
+                {#- Tool Response handling -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- '<|tool_response>' -}}
+                    {%- if tool_response['response'] is mapping -%}
+                        {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}
+                        {%- for key, value in tool_response['response'] | dictsort -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                            {%- if not loop.last %},{% endif -%}
+                        {%- endfor -%}
+                        {{- '}' -}}
+                    {%- else -%}
+                        {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}
+                    {%- endif -%}
+                    {{- '<tool_response|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_response' -%}
+            {%- endif -%}
+
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '<|image|>' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '<|video|>' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+        {%- if not (message['tool_responses'] and not message['content']) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' -%}
+        {{- '<|turn>model\n' -}}
+    {%- endif -%}
+    {%- if not enable_thinking | default(false) -%}
+        {{- '<|channel>thought\n<channel|>' -}}
+    {%- endif -%}
+{%- endif -%}
diff --git a/models/templates/google-gemma-4-31B-it.jinja b/models/templates/google-gemma-4-31B-it.jinja
new file mode 100644
index 0000000000..98da08eb6b
--- /dev/null
+++ b/models/templates/google-gemma-4-31B-it.jinja
@@ -0,0 +1,347 @@
+{%- macro format_parameters(properties, required) -%}
+    {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
+    {%- set ns = namespace(found_first=false) -%}
+    {%- for key, value in properties | dictsort -%}
+        {%- set add_comma = false -%}
+        {%- if key not in standard_keys -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {{ key }}:{
+            {%- if value['description'] -%}
+                description:<|"|>{{ value['description'] }}<|"|>
+                {%- set add_comma = true -%}
+            {%- endif -%}
+            {%- if value['type'] | upper == 'STRING' -%}
+                {%- if value['enum'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    enum:{{ format_argument(value['enum']) }}
+                {%- endif -%}
+            {%- elif value['type'] | upper == 'ARRAY' -%}
+                {%- if value['items'] is mapping and value['items'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    items:{
+                    {%- set ns_items = namespace(found_first=false) -%}
+                    {%- for item_key, item_value in value['items'] | dictsort -%}
+                        {%- if item_value is not none -%}
+                            {%- if ns_items.found_first %},{% endif -%}
+                            {%- set ns_items.found_first = true -%}
+                            {%- if item_key == 'properties' -%}
+                                properties:{
+                                {%- if item_value is mapping -%}
+                                    {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
+                                {%- endif -%}
+                                }
+                            {%- elif item_key == 'required' -%}
+                                required:[
+                                {%- for req_item in item_value -%}
+                                    <|"|>{{- req_item -}}<|"|>
+                                    {%- if not loop.last %},{% endif -%}
+                                {%- endfor -%}
+                                ]
+                            {%- elif item_key == 'type' -%}
+                                {%- if item_value is string -%}
+                                    type:{{ format_argument(item_value | upper) }}
+                                {%- else -%}
+                                    type:{{ format_argument(item_value | map('upper') | list) }}
+                                {%- endif -%}
+                            {%- else -%}
+                                {{ item_key }}:{{ format_argument(item_value) }}
+                            {%- endif -%}
+                        {%- endif -%}
+                    {%- endfor -%}
+                    }
+                {%- endif -%}
+            {%- endif -%}
+            {%- if value['nullable'] %}
+                {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                nullable:true
+            {%- endif -%}
+            {%- if value['type'] | upper == 'OBJECT' -%}
+                {%- if value['properties'] is defined and value['properties'] is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value['properties'], value['required'] | default([])) -}}
+                    }
+                {%- elif value is mapping -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    properties:{
+                    {{- format_parameters(value, value['required'] | default([])) -}}
+                    }
+                {%- endif -%}
+                {%- if value['required'] -%}
+                    {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+                    required:[
+                    {%- for item in value['required'] | default([]) -%}
+                        <|"|>{{- item -}}<|"|>
+                        {%- if not loop.last %},{% endif -%}
+                    {%- endfor -%}
+                    ]
+                {%- endif -%}
+            {%- endif -%}
+            {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
+            type:<|"|>{{ value['type'] | upper }}<|"|>}
+        {%- endif -%}
+    {%- endfor -%}
+{%- endmacro -%}
+{%- macro format_function_declaration(tool_data) -%}
+    declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
+    {%- set params = tool_data['function']['parameters'] -%}
+    {%- if params -%}
+        ,parameters:{
+        {%- if params['properties'] -%}
+            properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
+        {%- endif -%}
+        {%- if params['required'] -%}
+            required:[
+            {%- for item in params['required'] -%}
+                <|"|>{{- item -}}<|"|>
+                {{- ',' if not loop.last -}}
+            {%- endfor -%}
+            ],
+        {%- endif -%}
+        {%- if params['type'] -%}
+            type:<|"|>{{- params['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    {%- if 'response' in tool_data['function'] -%}
+        {%- set response_declaration = tool_data['function']['response'] -%}
+        ,response:{
+        {%- if response_declaration['description'] -%}
+            description:<|"|>{{- response_declaration['description'] -}}<|"|>,
+        {%- endif -%}
+        {%- if response_declaration['type'] | upper == 'OBJECT' -%}
+            type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
+        {%- endif -%}
+    {%- endif -%}
+    }
+{%- endmacro -%}
+{%- macro format_argument(argument, escape_keys=True) -%}
+    {%- if argument is string -%}
+        {{- '<|"|>' + argument + '<|"|>' -}}
+    {%- elif argument is boolean -%}
+        {{- 'true' if argument else 'false' -}}
+    {%- elif argument is mapping -%}
+        {{- '{' -}}
+        {%- set ns = namespace(found_first=false) -%}
+        {%- for key, value in argument | dictsort -%}
+            {%- if ns.found_first %},{% endif -%}
+            {%- set ns.found_first = true -%}
+            {%- if escape_keys -%}
+                {{- '<|"|>' + key + '<|"|>' -}}
+            {%- else -%}
+                {{- key -}}
+            {%- endif -%}
+            :{{- format_argument(value, escape_keys=escape_keys) -}}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- elif argument is sequence -%}
+        {{- '[' -}}
+        {%- for item in argument -%}
+            {{- format_argument(item, escape_keys=escape_keys) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- ']' -}}
+    {%- else -%}
+        {{- argument -}}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro strip_thinking(text) -%}
+    {%- set ns = namespace(result='') -%}
+    {%- for part in text.split('<channel|>') -%}
+        {%- if '<|channel>' in part -%}
+            {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
+        {%- else -%}
+            {%- set ns.result = ns.result + part -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {{- ns.result | trim -}}
+{%- endmacro -%}
+
+{%- macro format_tool_response_block(tool_name, response) -%}
+    {{- '<|tool_response>' -}}
+    {%- if response is mapping -%}
+        {{- 'response:' + tool_name + '{' -}}
+        {%- for key, value in response | dictsort -%}
+            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+            {%- if not loop.last %},{% endif -%}
+        {%- endfor -%}
+        {{- '}' -}}
+    {%- else -%}
+        {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
+    {%- endif -%}
+    {{- '<tool_response|>' -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(prev_message_type=None) -%}
+{%- set loop_messages = messages -%}
+{{- bos_token -}}
+{#- Handle System/Tool Definitions Block -#}
+{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
+    {{- '<|turn>system\n' -}}
+
+    {#- Inject Thinking token at the very top of the FIRST system turn -#}
+    {%- if enable_thinking is defined and enable_thinking -%}
+        {{- '<|think|>\n' -}}
+        {%- set ns.prev_message_type = 'think' -%}
+    {%- endif -%}
+
+    {%- if messages[0]['role'] in ['system', 'developer'] -%}
+        {{- messages[0]['content'] | trim -}}
+        {%- set loop_messages = messages[1:] -%}
+    {%- endif -%}
+
+    {%- if tools -%}
+        {%- for tool in tools %}
+            {{- '<|tool>' -}}
+            {{- format_function_declaration(tool) | trim -}}
+            {{- '<tool|>' -}}
+        {%- endfor %}
+        {%- set ns.prev_message_type = 'tool' -%}
+    {%- endif -%}
+
+    {{- '<turn|>\n' -}}
+{%- endif %}
+
+{#- Pre-scan: find last user message index for reasoning guard -#}
+{%- set ns_turn = namespace(last_user_idx=-1) -%}
+{%- for i in range(loop_messages | length) -%}
+    {%- if loop_messages[i]['role'] == 'user' -%}
+        {%- set ns_turn.last_user_idx = i -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{#- Loop through messages -#}
+{%- for message in loop_messages -%}
+    {%- if message['role'] != 'tool' -%}
+    {%- set ns.prev_message_type = None -%}
+    {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
+    {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
+    {%- set prev_nt = namespace(role=None, found=false) -%}
+    {%- if loop.index0 > 0 -%}
+        {%- for j in range(loop.index0 - 1, -1, -1) -%}
+            {%- if not prev_nt.found -%}
+                {%- if loop_messages[j]['role'] != 'tool' -%}
+                    {%- set prev_nt.role = loop_messages[j]['role'] -%}
+                    {%- set prev_nt.found = true -%}
+                {%- endif -%}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- endif -%}
+    {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
+    {%- if not continue_same_model_turn -%}
+        {{- '<|turn>' + role + '\n' }}
+    {%- endif -%}
+
+    {#- Render reasoning/reasoning_content as thinking channel -#}
+    {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
+    {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
+        {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
+    {%- endif -%}
+
+            {%- if message['tool_calls'] -%}
+                {%- for tool_call in message['tool_calls'] -%}
+                    {%- set function = tool_call['function'] -%}
+                    {{- '<|tool_call>call:' + function['name'] + '{' -}}
+                    {%- if function['arguments'] is mapping -%}
+                        {%- set ns_args = namespace(found_first=false) -%}
+                        {%- for key, value in function['arguments'] | dictsort -%}
+                            {%- if ns_args.found_first %},{% endif -%}
+                            {%- set ns_args.found_first = true -%}
+                            {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
+                        {%- endfor -%}
+                    {%- elif function['arguments'] is string -%}
+                        {{- function['arguments'] -}}
+                    {%- endif -%}
+                    {{- '}<tool_call|>' -}}
+                {%- endfor -%}
+                {%- set ns.prev_message_type = 'tool_call' -%}
+            {%- endif -%}
+
+            {%- set ns_tr_out = namespace(flag=false) -%}
+            {%- if message.get('tool_responses') -%}
+                {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
+                {%- for tool_response in message['tool_responses'] -%}
+                    {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
+                    {%- set ns_tr_out.flag = true -%}
+                    {%- set ns.prev_message_type = 'tool_response' -%}
+                {%- endfor -%}
+            {%- elif message.get('tool_calls') -%}
+                {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
+                {%- set ns_tool_scan = namespace(stopped=false) -%}
+                {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
+                    {%- if ns_tool_scan.stopped -%}
+                    {%- elif loop_messages[k]['role'] != 'tool' -%}
+                        {%- set ns_tool_scan.stopped = true -%}
+                    {%- else -%}
+                        {%- set follow = loop_messages[k] -%}
+                        {#- Resolve tool_call_id to function name -#}
+                        {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
+                        {%- for tc in message['tool_calls'] -%}
+                            {%- if tc.get('id') == follow.get('tool_call_id') -%}
+                                {%- set ns_tname.name = tc['function']['name'] -%}
+                            {%- endif -%}
+                        {%- endfor -%}
+                        {#- Handle content as string or content-parts array -#}
+                        {%- set tool_body = follow.get('content') -%}
+                        {%- if tool_body is string -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- elif tool_body is sequence and tool_body is not string -%}
+                            {%- set ns_txt = namespace(s='') -%}
+                            {%- for part in tool_body -%}
+                                {%- if part.get('type') == 'text' -%}
+                                    {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
+                                {%- endif -%}
+                            {%- endfor -%}
+                            {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
+                        {%- else -%}
+                            {{- format_tool_response_block(ns_tname.name, tool_body) -}}
+                        {%- endif -%}
+                        {%- set ns_tr_out.flag = true -%}
+                        {%- set ns.prev_message_type = 'tool_response' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+            {%- if message['content'] is string -%}
+                {%- if role == 'model' -%}
+                    {{- strip_thinking(message['content']) -}}
+                {%- else -%}
+                    {{- message['content'] | trim -}}
+                {%- endif -%}
+            {%- elif message['content'] is sequence -%}
+                {%- for item in message['content'] -%}
+                    {%- if item['type'] == 'text' -%}
+                        {%- if role == 'model' -%}
+                            {{- strip_thinking(item['text']) -}}
+                        {%- else -%}
+                            {{- item['text'] | trim -}}
+                        {%- endif -%}
+                    {%- elif item['type'] == 'image' -%}
+                        {{- '<|image|>' -}}
+                        {%- set ns.prev_message_type = 'image' -%}
+                    {%- elif item['type'] == 'audio' -%}
+                        {{- '<|audio|>' -}}
+                        {%- set ns.prev_message_type = 'audio' -%}
+                    {%- elif item['type'] == 'video' -%}
+                        {{- '<|video|>' -}}
+                        {%- set ns.prev_message_type = 'video' -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+
+        {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
+            {{- '<|tool_response>' -}}
+        {%- elif not (ns_tr_out.flag and not message.get('content')) -%}
+            {{- '<turn|>\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt -%}
+    {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
+        {{- '<|turn>model\n' -}}
+        {%- if not enable_thinking | default(false) -%}
+            {{- '<|channel>thought\n<channel|>' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endif -%}
diff --git a/models/templates/llama-cpp-deepseek-r1.jinja b/models/templates/llama-cpp-deepseek-r1.jinja
index 0d18870870..151b2edd02 100644
--- a/models/templates/llama-cpp-deepseek-r1.jinja
+++ b/models/templates/llama-cpp-deepseek-r1.jinja
@@ -49,7 +49,7 @@ Example function tool call syntax:
             {%- endif -%}
             {%- set tool_name = tc['function']['name'] -%}
             {%- set tool_args = tc['function']['arguments'] -%}
-            {{- '<｜tool▁call▁begin｜>' + tc['type'] + '<｜tool▁sep｜>' + tool_name + '\n' + '```json' + '\n' + tool_args + '\n' + '```' + '<｜tool▁call▁end｜>' -}}
+            {{- '<｜tool▁call▁begin｜>' + tc['type'] + '<｜tool▁sep｜>' + tool_name + '\n' + '```json' + '\n' + tool_args | tojson + '\n' + '```' + '<｜tool▁call▁end｜>' -}}
         {%- endfor -%}
         {{- '<｜tool▁calls▁end｜><｜end▁of▁sentence｜>' -}}
     {%- endif -%}
diff --git a/models/templates/meetkai-functionary-medium-v3.1.jinja b/models/templates/meetkai-functionary-medium-v3.1.jinja
index 29d64a215a..5f74b72f33 100644
--- a/models/templates/meetkai-functionary-medium-v3.1.jinja
+++ b/models/templates/meetkai-functionary-medium-v3.1.jinja
@@ -42,9 +42,9 @@
         {%- if 'tool_calls' in message and message['tool_calls'] -%}
             {%- for tool_call in message['tool_calls'] -%}
                 {%- if tool_call["function"]["name"] == "python" -%}
-                    {{ '<|python_tag|>' + tool_call['function']['arguments'] }}
+                    {{ '<|python_tag|>' + tool_call['function']['arguments'] | tojson }}
                 {%- else -%}
-                    {{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] + '</function>' }}
+                    {{ '<function=' + tool_call['function']['name'] + '>' + tool_call['function']['arguments'] | tojson + '</function>' }}
                 {%- endif -%}
             {%- endfor -%}
             {{ '<|eom_id|>' }}
diff --git a/models/templates/unsloth-Apriel-1.5.jinja b/models/templates/unsloth-Apriel-1.5.jinja
index 29e582fbf6..1639b63901 100644
--- a/models/templates/unsloth-Apriel-1.5.jinja
+++ b/models/templates/unsloth-Apriel-1.5.jinja
@@ -86,22 +86,22 @@ Prior to generating the function calls, you should generate the reasoning for wh
             {%- set add_tool_id = false -%}
         {%- endif -%}
         {{- '<|assistant|>\n' -}}
-        {%- if message['content'] is not none and message['content']|length > 0 -%}
+        {%- if message['content'] is defined and message['content'] is not none and message['content']|length > 0 -%}
             {%- if message['content'] is not string and message['content'][0]['text'] is not none %}
                 {{- message['content'][0]['text'] }}
             {%- else %}
                 {{- message['content'] -}}
             {%- endif -%}
-        {%- elif message['chosen'] is not none and message['chosen']|length > 0 -%}
+        {%- elif message['chosen'] is defined and message['chosen'] is not none and message['chosen']|length > 0 -%}
             {{- message['chosen'][0] -}}
         {%- endif -%}
         {%- if add_thoughts and 'thought' in message and message['thought'] is not none -%}
             {{- '<thinking>' + message['thought'] + '</thinking>' -}}
         {%- endif -%}
-        {%- if message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
+        {%- if message['tool_calls'] is defined and message['tool_calls'] is not none and message['tool_calls']|length > 0 -%}
             {{- '\n<tool_calls>[' -}}
             {%- for tool_call in message["tool_calls"] -%}
-                {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|string -}}
+                {{- '{"name": "' + tool_call['function']['name'] + '", "arguments": ' + tool_call['function']['arguments']|tojson -}}
                 {%- if add_tool_id == true -%}
                     {{- ', "id": "' + tool_call['id'] + '"' -}}
                 {%- endif -%}
diff --git a/scripts/server-test-model.py b/scripts/server-test-model.py
new file mode 100644
index 0000000000..9049d80279
--- /dev/null
+++ b/scripts/server-test-model.py
@@ -0,0 +1,202 @@
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = ""   # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "max_tokens": 5000,
+    }
+    if tools:
+        payload["tools"] = tools
+    if tool_choice:
+        payload["tool_choice"] = tool_choice
+
+    try:
+        response = requests.post(url, json=payload, stream=stream)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        if e.response is not None:
+            logger.info(f"Response error: {e} for {e.response.content}\n")
+        else:
+            logger.info(f"Error connecting to server: {e}\n")
+        return None
+
+    full_content = ""
+    reasoning_content = ""
+    tool_calls = []
+
+    if stream:
+        logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode("utf-8")
+                if decoded_line.startswith("data: "):
+                    data_str = decoded_line[6:]
+                    if data_str == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            delta = data["choices"][0].get("delta", {})
+
+                            # Content
+                            content_chunk = delta.get("content", "")
+                            if content_chunk:
+                                full_content += content_chunk
+                                logger.info(content_chunk)
+
+                            # Reasoning
+                            reasoning_chunk = delta.get("reasoning_content", "")
+                            if reasoning_chunk:
+                                reasoning_content += reasoning_chunk
+                                logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+                            # Tool calls
+                            if "tool_calls" in delta:
+                                for tc in delta["tool_calls"]:
+                                    index = tc.get("index")
+                                    if index is not None:
+                                        while len(tool_calls) <= index:
+                                            # Using "function" as type default but could be flexible
+                                            tool_calls.append(
+                                                {
+                                                    "id": "",
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": "",
+                                                        "arguments": "",
+                                                    },
+                                                }
+                                            )
+
+                                        if "id" in tc:
+                                            tool_calls[index]["id"] += tc["id"]
+                                        if "function" in tc:
+                                            if "name" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "name"
+                                                ] += tc["function"]["name"]
+                                            if "arguments" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "arguments"
+                                                ] += tc["function"]["arguments"]
+
+                    except json.JSONDecodeError:
+                        logger.info(f"Failed to decode JSON: {data_str}\n")
+        logger.info("\n--- End of Stream ---\n")
+    else:
+        logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            message = data["choices"][0].get("message", {})
+            full_content = message.get("content", "")
+            reasoning_content = message.get("reasoning_content", "")
+            tool_calls = message.get("tool_calls", [])
+            logger.info(full_content)
+        logger.info("--- End of Response ---\n")
+
+    return {
+        "content": full_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls,
+    }
+
+
+def test_chat(url, stream):
+    logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+    messages = [{"role": "user", "content": "What is the capital of France?"}]
+    result = run_query(url, messages, stream=stream)
+
+    if result:
+        if result["content"]:
+            logger.info("PASS: Output received.\n")
+        else:
+            logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+        if result.get("reasoning_content"):
+            logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+        else:
+            logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+    else:
+        logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+    logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in London? Please use the get_weather tool.",
+        }
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+    if result:
+        tcs = result.get("tool_calls")
+        if tcs and len(tcs) > 0:
+            logger.info("PASS: Tool calls detected.")
+            for tc in tcs:
+                func = tc.get("function", {})
+                logger.info(f"  Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+        else:
+            logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+        if result.get("reasoning_content"):
+            logger.info(
+                f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+            )
+    else:
+        logger.info("FAIL: Query failed.\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+    parser.add_argument("--host", default="localhost", help="Server host")
+    parser.add_argument("--port", default=8080, type=int, help="Server port")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+    logger.info(f"Testing server at {base_url}\n")
+
+    # Non-streaming tests
+    test_chat(base_url, stream=False)
+    test_tool_call(base_url, stream=False)
+
+    # Streaming tests
+    test_chat(base_url, stream=True)
+    test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp
index 5d3a864e2b..3ca663e373 100644
--- a/src/llama-grammar.cpp
+++ b/src/llama-grammar.cpp
@@ -617,7 +617,7 @@ const char* llama_grammar_parser::parse_sequence(
                 throw std::runtime_error(std::string("expecting an int at ") + pos);
             }
             const char* int_end = parse_int(pos);
-            uint64_t min_times = std::stoul(std::string(pos, int_end - pos));
+            uint64_t min_times = std::stoull(std::string(pos, int_end - pos));
             pos = parse_space(int_end, is_nested);
 
             uint64_t max_times = UINT64_MAX; // default: no max limit
@@ -631,7 +631,7 @@ const char* llama_grammar_parser::parse_sequence(
 
                 if (is_digit_char(*pos)) {
                     const char* int_end = parse_int(pos);
-                    max_times = std::stoul(std::string(pos, int_end - pos));
+                    max_times = std::stoull(std::string(pos, int_end - pos));
                     pos = parse_space(int_end, is_nested);
                 }
 
@@ -1434,7 +1434,9 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, const struct llam
     }
 
     llama_grammar_accept_token(grammar, token, piece);
-    smpl->t_sample_us += ggml_time_us() - t_start_sample_us;
+    if (smpl) {
+        smpl->t_sample_us += ggml_time_us() - t_start_sample_us;
+    }
 }
 
 void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
diff --git a/src/llama.cpp b/src/llama.cpp
index c4d8e79f94..b8bfe8f07c 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -8807,7 +8807,7 @@ struct llama_grammar * llama_grammar_copy(const struct llama_grammar * grammar)
     return llama_grammar_clone_impl(*grammar);
 }
 
-void llama_grammar_sample(
+void llama_grammar_apply(
       const struct llama_grammar * grammar,
       const struct llama_context * ctx,
           llama_token_data_array * candidates) {
@@ -8818,7 +8818,7 @@ void llama_sample_grammar(
             struct llama_context * ctx,
           llama_token_data_array * candidates,
       const struct llama_grammar * grammar) {
-    llama_grammar_sample(grammar, ctx, candidates);
+    llama_grammar_apply(grammar, ctx, candidates);
 }
 
 void llama_grammar_accept_token(
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index fa1d848680..878d4a3403 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -182,12 +182,12 @@ if (NOT WIN32)
     # llama_target_and_test(test-double-float.cpp) # SLOW
 endif()
 
-llama_build_and_test(test-chat-parser.cpp)
-llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
-#llama_build_and_test(test-chat-template.cpp)
+#llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
 llama_build_and_test(test-jinja.cpp)
 llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
-llama_build_and_test(test-json-partial.cpp)
+llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+llama_build_and_test(test-chat-template.cpp )
+llama_build_and_test(test-json-partial.cpp )
 #llama_build_and_test(test-log.cpp)
 llama_build_and_test(
     test-peg-parser.cpp
@@ -196,6 +196,7 @@ llama_build_and_test(
     peg-parser/test-gbnf-generation.cpp
     peg-parser/test-json-parser.cpp
     peg-parser/test-json-serialization.cpp
+    peg-parser/test-python-dict-parser.cpp
     peg-parser/test-unicode.cpp
     peg-parser/tests.h
 )
@@ -211,3 +212,5 @@ llama_target_and_test(test-autorelease.cpp        LABEL "model")
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
 add_executable(${TEST_TARGET} test-c.c)
 target_link_libraries(${TEST_TARGET} PRIVATE llama)
+
+
diff --git a/tests/peg-parser/test-basic.cpp b/tests/peg-parser/test-basic.cpp
index 1bda6f2e69..b6af61491d 100644
--- a/tests/peg-parser/test-basic.cpp
+++ b/tests/peg-parser/test-basic.cpp
@@ -1,3 +1,4 @@
+#include "peg-parser.h"
 #include "tests.h"
 
 void test_basic(testing & t) {
@@ -119,7 +120,7 @@ void test_basic(testing & t) {
                 return p.literal("hello") + p.optional(p.literal(" world"));
             });
 
-            auto ctx    = common_peg_parse_context("hello", false);
+            auto ctx    = common_peg_parse_context("hello");
             auto result = parser.parse(ctx);
             t.assert_equal("optional_absent", true, result.success());
             t.assert_equal("optional_absent_end", 5u, result.end);
@@ -131,7 +132,7 @@ void test_basic(testing & t) {
                 return p.literal("hello") + p.optional(p.literal(" world"));
             });
 
-            auto ctx    = common_peg_parse_context("hello ", true);
+            auto ctx    = common_peg_parse_context("hello ", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("partial_match_need_more", true, result.need_more_input());
         });
@@ -214,7 +215,7 @@ void test_basic(testing & t) {
         t.test("sequence_partial_match_1", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
 
-            auto ctx    = common_peg_parse_context("<thi", true);
+            auto ctx    = common_peg_parse_context("<thi", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("sequence_partial_match_1", true, result.need_more_input());
         });
@@ -223,7 +224,7 @@ void test_basic(testing & t) {
         t.test("sequence_partial_match_2", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("begin") + p.literal("end"); });
 
-            auto ctx    = common_peg_parse_context("begin", true);
+            auto ctx    = common_peg_parse_context("begin", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("sequence_partial_match_2", true, result.need_more_input());
         });
@@ -232,7 +233,7 @@ void test_basic(testing & t) {
         t.test("sequence_partial_match_3", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
 
-            auto ctx    = common_peg_parse_context("<think></", true);
+            auto ctx    = common_peg_parse_context("<think></", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("sequence_partial_match_3", true, result.need_more_input());
         });
@@ -241,7 +242,7 @@ void test_basic(testing & t) {
         t.test("sequence_full_match", [&](testing & t) {
             auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("hello") + p.literal("world"); });
 
-            auto ctx    = common_peg_parse_context("helloworld", false);
+            auto ctx    = common_peg_parse_context("helloworld");
             auto result = common_chat_combinator_parser.parse(ctx);
             t.assert_equal("sequence_full_match", true, result.success());
         });
@@ -250,7 +251,7 @@ void test_basic(testing & t) {
         t.test("sequence_no_match", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
 
-            auto ctx    = common_peg_parse_context("<think>I am common_chat_combinator_parser", true);
+            auto ctx    = common_peg_parse_context("<think>I am common_chat_combinator_parser", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("sequence_no_match", true, result.fail());
         });
@@ -259,7 +260,7 @@ void test_basic(testing & t) {
         t.test("choices_partial_match_1", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("option1") | p.literal("option2"); });
 
-            auto ctx    = common_peg_parse_context("opt", true);
+            auto ctx    = common_peg_parse_context("opt", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("choices_partial_match_1", true, result.need_more_input());
         });
@@ -269,7 +270,7 @@ void test_basic(testing & t) {
             auto parser =
                 build_peg_parser([](common_peg_parser_builder & p) { return p.literal("choice_a") | p.literal("choice_b"); });
 
-            auto ctx    = common_peg_parse_context("choice", true);
+            auto ctx    = common_peg_parse_context("choice", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("choices_partial_match_2", true, result.need_more_input());
         });
@@ -278,7 +279,7 @@ void test_basic(testing & t) {
         t.test("choices_full_match_1", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("first") | p.literal("second"); });
 
-            auto ctx    = common_peg_parse_context("first", false);
+            auto ctx    = common_peg_parse_context("first");
             auto result = parser.parse(ctx);
             t.assert_equal("choices_full_match_1", true, result.success());
         });
@@ -287,7 +288,7 @@ void test_basic(testing & t) {
         t.test("choices_full_match_2", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("alpha") | p.literal("beta"); });
 
-            auto ctx    = common_peg_parse_context("beta", false);
+            auto ctx    = common_peg_parse_context("beta");
             auto result = parser.parse(ctx);
             t.assert_equal("choices_full_match_2", true, result.success());
         });
@@ -296,7 +297,7 @@ void test_basic(testing & t) {
         t.test("choices_no_match", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("good") | p.literal("better"); });
 
-            auto ctx    = common_peg_parse_context("best", false);
+            auto ctx    = common_peg_parse_context("best");
             auto result = parser.parse(ctx);
             t.assert_equal("choices_no_match", true, result.fail());
         });
@@ -305,7 +306,7 @@ void test_basic(testing & t) {
         t.test("zero_or_more_partial_match_1", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("ab")); });
 
-            auto ctx    = common_peg_parse_context("a", true);
+            auto ctx    = common_peg_parse_context("a", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("zero_or_more_partial_match_1", true, result.need_more_input());
         });
@@ -314,7 +315,7 @@ void test_basic(testing & t) {
         t.test("zero_or_more_partial_match_2", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("xy")); });
 
-            auto ctx    = common_peg_parse_context("xyx", true);
+            auto ctx    = common_peg_parse_context("xyx", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("zero_or_more_partial_match_2", true, result.need_more_input());
         });
@@ -323,7 +324,7 @@ void test_basic(testing & t) {
         t.test("zero_or_more_full_match", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("test")); });
 
-            auto ctx    = common_peg_parse_context("test", false);
+            auto ctx    = common_peg_parse_context("test");
             auto result = parser.parse(ctx);
             t.assert_equal("zero_or_more_full_match", true, result.success());
         });
@@ -332,7 +333,7 @@ void test_basic(testing & t) {
         t.test("one_or_more_partial_match_1", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("repeat")); });
 
-            auto ctx    = common_peg_parse_context("rep", true);
+            auto ctx    = common_peg_parse_context("rep", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("one_or_more_partial_match_1", true, result.need_more_input());
         });
@@ -341,7 +342,7 @@ void test_basic(testing & t) {
         t.test("one_or_more_partial_match_2", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("ab")); });
 
-            auto ctx    = common_peg_parse_context("aba", true);
+            auto ctx    = common_peg_parse_context("aba", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto result = parser.parse(ctx);
             t.assert_equal("one_or_more_partial_match_2", true, result.need_more_input());
         });
@@ -350,7 +351,7 @@ void test_basic(testing & t) {
         t.test("one_or_more_full_match", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("single")); });
 
-            auto ctx    = common_peg_parse_context("single", false);
+            auto ctx    = common_peg_parse_context("single");
             auto result = parser.parse(ctx);
             t.assert_equal("one_or_more_full_match", true, result.success());
         });
@@ -359,7 +360,7 @@ void test_basic(testing & t) {
         t.test("one_or_more_no_match", [&](testing & t) {
             auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("()")); });
 
-            auto ctx    = common_peg_parse_context("success", false);
+            auto ctx    = common_peg_parse_context("success");
             auto result = parser.parse(ctx);
             t.assert_equal("one_or_more_no_match", true, result.fail());
         });
@@ -375,7 +376,7 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("1", false);
+            common_peg_parse_context ctx("1");
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_success", true, result.success());
@@ -389,7 +390,7 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("[1]", false);
+            common_peg_parse_context ctx("[1]");
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_success", true, result.success());
@@ -403,7 +404,7 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("[[2]]", false);
+            common_peg_parse_context ctx("[[2]]");
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_success", true, result.success());
@@ -417,7 +418,7 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("[[[3]]]", false);
+            common_peg_parse_context ctx("[[[3]]]");
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_success", true, result.success());
@@ -431,7 +432,7 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("[[", true);
+            common_peg_parse_context ctx("[[", COMMON_PEG_PARSE_FLAG_LENIENT);
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_need_more_input", true, result.need_more_input());
@@ -445,10 +446,26 @@ void test_basic(testing & t) {
                 return p.rule("value", p.ref("number") | p.ref("list"));
             });
 
-            common_peg_parse_context ctx("[a]", false);
+            common_peg_parse_context ctx("[a]");
             auto           result = value_parser.parse(ctx);
 
             t.assert_equal("result_is_fail", true, result.fail());
         });
+
+        // Test markers
+        t.test("marker", [](testing &t) {
+            auto bracket_parser = build_peg_parser([](common_peg_parser_builder & p) {
+                return p.marker();
+            });
+
+            common_peg_parse_context ctx_square("[marker]");
+            common_peg_parse_context ctx_sharp("<marker>");
+
+            auto result_square = bracket_parser.parse(ctx_square);
+            auto result_sharp = bracket_parser.parse(ctx_sharp);
+
+            t.assert_true("result_square_is_success", result_square.success());
+            t.assert_true("result_sharp_is_success", result_sharp.success());
+        });
     });
 }
diff --git a/tests/peg-parser/test-gbnf-generation.cpp b/tests/peg-parser/test-gbnf-generation.cpp
index 68857a5e88..fe4bbbdd16 100644
--- a/tests/peg-parser/test-gbnf-generation.cpp
+++ b/tests/peg-parser/test-gbnf-generation.cpp
@@ -213,6 +213,126 @@ void test_gbnf_generation(testing &t) {
         )""", gbnf);
     });
 
+    t.test("tagged choice inside sequence gets parenthesized", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("a") + p.tag("t", p.literal("b") | p.literal("c"));
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "a" ("b" | "c")
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("tagged sequence inside choice gets parenthesized", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("t", p.literal("a") + p.literal("b")) | p.literal("c");
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "a" "b" | "c"
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("atomic choice inside repetition gets parenthesized", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.one_or_more(p.atomic(p.literal("a") | p.literal("b")));
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= ("a" | "b")+
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("silent parser emits nothing in gbnf", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("hello") + p.gbnf(p.literal("world"), "");
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "hello"
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("silent choice inside sequence emits nothing", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("a") + p.gbnf(p.literal("b") | p.literal("c"), "") + p.literal("d");
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "a" "d"
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("silent wrapped in tag emits nothing", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("a") + p.tag("t", p.gbnf(p.literal("b"), ""));
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "a"
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("gbnf parser emits custom grammar", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("a") + p.gbnf(p.literal("b"), "[a-z]+");
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "a" [a-z]+
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
+    t.test("nested transparent wrappers get parenthesized", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.literal("x") + p.tag("outer", p.atomic(p.literal("a") | p.literal("b")));
+        });
+
+        auto gbnf = build_grammar([&](const common_grammar_builder & builder) {
+            parser.build_grammar(builder);
+        });
+
+        assert_gbnf_equal(t, R"""(
+            root ::= "x" ("a" | "b")
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+        )""", gbnf);
+    });
+
     t.test("emit only trigger rules (and references)", [](testing &t) {
         auto parser = build_peg_parser([](common_peg_parser_builder & p) {
             auto rule1 = p.rule("rule-1", p.literal("a") + p.ref("rule-2"));
diff --git a/tests/peg-parser/test-json-parser.cpp b/tests/peg-parser/test-json-parser.cpp
index 48351cd66f..5dd00115ce 100644
--- a/tests/peg-parser/test-json-parser.cpp
+++ b/tests/peg-parser/test-json-parser.cpp
@@ -46,7 +46,7 @@ void test_json_parser(testing &t) {
         auto json = build_peg_parser([](common_peg_parser_builder & p) { return p.json(); });
 
         std::string    input = R"({"name": "test", "value": )";
-        common_peg_parse_context ctx(input, true);
+        common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
 
         auto result = json.parse(ctx);
 
@@ -58,7 +58,7 @@ void test_json_parser(testing &t) {
         auto json = build_peg_parser([](common_peg_parser_builder & p) { return p.json(); });
 
         std::string    input = R"([1, 2, 3, )";
-        common_peg_parse_context ctx(input, true);
+        common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
 
         auto result = json.parse(ctx);
 
@@ -70,7 +70,7 @@ void test_json_parser(testing &t) {
         auto json = build_peg_parser([](common_peg_parser_builder & p) { return p.json(); });
 
         std::string    input = R"({"data": {"nested": )";
-        common_peg_parse_context ctx(input, true);
+        common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
 
         auto result = json.parse(ctx);
 
@@ -84,7 +84,7 @@ void test_json_parser(testing &t) {
 
         t.test("success", [&](testing &t) {
             std::string input = R"("name": "bob")";
-            common_peg_parse_context ctx(input, false);
+            common_peg_parse_context ctx(input);
 
             auto result = parser.parse(ctx);
             t.assert_true("success", result.success());
@@ -92,7 +92,7 @@ void test_json_parser(testing &t) {
 
         t.test("partial", [&](testing &t) {
             std::string input = R"("name": "bo)";
-            common_peg_parse_context ctx(input, true);
+            common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
 
             auto result = parser.parse(ctx);
             t.assert_true("need more input", result.need_more_input());
@@ -100,7 +100,7 @@ void test_json_parser(testing &t) {
 
         t.test("failed", [&](testing &t) {
             std::string input = R"([])";
-            common_peg_parse_context ctx(input, false);
+            common_peg_parse_context ctx(input);
 
             auto result = parser.parse(ctx);
             t.assert_true("fail", result.fail());
diff --git a/tests/peg-parser/test-python-dict-parser.cpp b/tests/peg-parser/test-python-dict-parser.cpp
new file mode 100644
index 0000000000..1a549106b8
--- /dev/null
+++ b/tests/peg-parser/test-python-dict-parser.cpp
@@ -0,0 +1,318 @@
+#include "tests.h"
+
+void test_python_dict_parser(testing &t) {
+    // Test parsing a simple Python dict object with single quotes
+    t.test("simple Python dict object parsing", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test', 'value': 42, 'flag': True}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing a Python array with mixed types
+    t.test("Python array with mixed types", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "[1, 'hello', True, None, 3.14]";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing nested Python dict with objects and arrays
+    t.test("nested Python dict with objects and arrays", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string input =
+            "{'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}], 'count': 2, 'metadata': {'version': '1.0', 'tags': ['admin', 'user']}}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with escaped single quotes
+    t.test("Python dict with escaped single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'message': 'It\\'s working!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test parsing Python dict with double quotes inside single quotes
+    t.test("Python dict with double quotes inside single quotes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'quote': 'He said \"Hello\"'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test the example from the requirements
+    t.test("complex Python dict example from requirements", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test need_more_input() parsing - incomplete object
+    t.test("need_more_input() parsing - incomplete object", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test', 'value': ";
+        common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test need_more_input() parsing - incomplete single-quoted string
+    t.test("need_more_input() parsing - incomplete single-quoted string", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'name': 'test";
+        common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_need_more_input", true, result.need_more_input());
+    });
+
+    // Test unicode in Python dict strings
+    t.test("unicode in Python dict strings", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'message': 'Hello, 世界!'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python dict with unicode escapes
+    t.test("Python dict with unicode escapes", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{'unicode': 'Hello\\u0041'}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test that Python parser accepts double-quoted strings too
+    t.test("Python parser accepts double-quoted strings", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{\"name\": \"test\"}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python parser with mixed quote styles
+    t.test("Python parser with mixed quote styles", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        std::string    input = "{\"name\": 'test', 'value': \"hello\"}";
+        common_peg_parse_context ctx(input);
+
+        auto result = parser.parse(ctx);
+
+        t.assert_equal("result_is_success", true, result.success());
+        t.assert_equal("result_end", input.size(), result.end);
+    });
+
+    // Test Python True/False/None
+    t.test("Python True/False/None", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.python_value(); });
+
+        t.test("True", [&](testing &t) {
+            std::string input = "True";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("False", [&](testing &t) {
+            std::string input = "False";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("None", [&](testing &t) {
+            std::string input = "None";
+            common_peg_parse_context ctx(input);
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("rejects JSON-style true/false/null", [&](testing &t) {
+            for (const auto & kw : {"true", "false", "null"}) {
+                std::string input = kw;
+                common_peg_parse_context ctx(input);
+                auto result = parser.parse(ctx);
+                t.assert_true(std::string("rejects ") + kw, result.fail());
+            }
+        });
+    });
+
+    // Test single-quoted string content parser directly
+    t.test("single-quoted string content parser", [](testing &t) {
+        auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+            return p.sequence({ p.literal("'"), p.string_content('\''), p.literal("'"), p.space() });
+        });
+
+        t.test("simple string", [&](testing &t) {
+            std::string input = "'hello'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with escaped single quote", [&](testing &t) {
+            std::string input = "'it\\'s'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("string with double quotes", [&](testing &t) {
+            std::string input = "'say \"hello\"'";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("incomplete string", [&](testing &t) {
+            std::string input = "'hello";
+            common_peg_parse_context ctx(input, COMMON_PEG_PARSE_FLAG_LENIENT);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("need_more_input", result.need_more_input());
+        });
+    });
+
+    // Test json() with pre-registered flexible json-string rule (python dict support)
+    t.test("json() parser with flexible json-string rule", [](testing &t) {
+        t.test("json() rejects single quotes by default", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("fail", result.fail());
+        });
+
+        t.test("json() accepts single quotes with pre-registered flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                // Pre-register json-string rule with both quote styles
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{'name': 'test'}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() still accepts double quotes with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{\"name\": \"test\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("json() accepts mixed quote styles with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{\"name\": 'test', 'value': \"hello\"}";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+
+        t.test("complex nested structure with flexible json-string rule", [&](testing &t) {
+            auto parser = build_peg_parser([](common_peg_parser_builder & p) {
+                p.rule("json-string", [&]() {
+                    return p.choice({ p.double_quoted_string(), p.single_quoted_string() });
+                });
+                return p.json();
+            });
+
+            std::string input = "{ 'obj' : { 'something': 1, 'other \"something\"' : 'foo\\'s bar' } }";
+            common_peg_parse_context ctx(input);
+
+            auto result = parser.parse(ctx);
+            t.assert_true("success", result.success());
+            t.assert_equal("end", input.size(), result.end);
+        });
+    });
+}
diff --git a/tests/peg-parser/test-unicode.cpp b/tests/peg-parser/test-unicode.cpp
index 19d9b9e41c..24663d7017 100644
--- a/tests/peg-parser/test-unicode.cpp
+++ b/tests/peg-parser/test-unicode.cpp
@@ -58,7 +58,7 @@ void test_unicode(testing &t) {
             std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
             t.test(test_name, [&](testing &t) {
-                common_peg_parse_context ctx(tc.input, true);
+                common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                 auto result = parser.parse(ctx);
 
                 // Assert result type matches
@@ -101,7 +101,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, true);
+                    common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                     auto result = parser.parse(ctx);
 
                     // Assert result type matches
@@ -142,7 +142,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, true);
+                    common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                     auto result = parser.parse(ctx);
 
                     // Assert result type matches
@@ -187,7 +187,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, true);
+                    common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                     auto result = parser.parse(ctx);
 
                     // Assert result type matches
@@ -225,7 +225,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, false);
+                    common_peg_parse_context ctx(tc.input);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -259,7 +259,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, true);
+                    common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -293,7 +293,7 @@ void test_unicode(testing &t) {
                 std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
 
                 t.test(test_name, [&](testing &t) {
-                    common_peg_parse_context ctx(tc.input, false);
+                    common_peg_parse_context ctx(tc.input);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -327,10 +327,10 @@ void test_unicode(testing &t) {
 
                 t.test(test_name, [&](testing &t) {
                     auto parser = build_peg_parser([](common_peg_parser_builder& p) {
-                        return p.sequence({p.json_string_content(), p.literal("\"")});
+                        return p.sequence({p.string_content('"'), p.literal("\"")});
                     });
 
-                    common_peg_parse_context ctx(tc.input, false);
+                    common_peg_parse_context ctx(tc.input);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -364,10 +364,10 @@ void test_unicode(testing &t) {
 
                 t.test(test_name, [&](testing &t) {
                     auto parser = build_peg_parser([](common_peg_parser_builder& p) {
-                        return p.json_string_content();
+                        return p.string_content('"');
                     });
 
-                    common_peg_parse_context ctx(tc.input, true);
+                    common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -390,9 +390,6 @@ void test_unicode(testing &t) {
 
                 // Invalid continuation byte
                 {std::string("\xC3\x28"), "", COMMON_PEG_PARSE_RESULT_FAIL},
-
-                // Overlong encoding (security issue)
-                {std::string("\xC0\x80"), "", COMMON_PEG_PARSE_RESULT_FAIL},
             };
 
             for (size_t i = 0; i < test_cases.size(); i++) {
@@ -401,10 +398,10 @@ void test_unicode(testing &t) {
 
                 t.test(test_name, [&](testing &t) {
                     auto parser = build_peg_parser([](common_peg_parser_builder& p) {
-                        return p.json_string_content();
+                        return p.string_content('"');
                     });
 
-                    common_peg_parse_context ctx(tc.input, false);
+                    common_peg_parse_context ctx(tc.input);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
@@ -430,10 +427,10 @@ void test_unicode(testing &t) {
 
                 t.test(test_name, [&](testing &t) {
                     auto parser = build_peg_parser([](common_peg_parser_builder& p) {
-                        return p.sequence({p.json_string_content(), p.literal("\"")});
+                        return p.sequence({p.string_content('"'), p.literal("\"")});
                     });
 
-                    common_peg_parse_context ctx(tc.input, false);
+                    common_peg_parse_context ctx(tc.input);
                     auto result = parser.parse(ctx);
 
                     assert_result_equal(t, tc.expected_result, result.type);
diff --git a/tests/peg-parser/tests.h b/tests/peg-parser/tests.h
index 4d3f4e9eaf..debd4286c5 100644
--- a/tests/peg-parser/tests.h
+++ b/tests/peg-parser/tests.h
@@ -22,3 +22,4 @@ void test_json_parser(testing &t);
 void test_gbnf_generation(testing &t);
 void test_unicode(testing &t);
 void test_json_serialization(testing &t);
+void test_python_dict_parser(testing &t);
diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp
new file mode 100644
index 0000000000..bb23b7f2aa
--- /dev/null
+++ b/tests/test-chat-auto-parser.cpp
@@ -0,0 +1,1969 @@
+#include "chat-auto-parser-helpers.h"
+#include "chat-auto-parser.h"
+#include "chat-peg-parser.h"
+#include "chat.h"
+#include "peg-parser.h"
+#include "testing.h"
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+
+using namespace autoparser;
+
+static void test_calculate_diff_split_basic(testing & t);
+static void test_calculate_diff_split_identical(testing & t);
+static void test_calculate_diff_split_common_prefix(testing & t);
+static void test_calculate_diff_split_common_suffix(testing & t);
+static void test_calculate_diff_split_common_both(testing & t);
+static void test_calculate_diff_split_empty_cases(testing & t);
+static void test_calculate_diff_split_no_common(testing & t);
+static void test_calculate_diff_split_single_char(testing & t);
+static void test_calculate_diff_split_overlaps(testing & t);
+static void test_calculate_diff_split_tag_boundaries(testing & t);
+static void test_calculate_diff_split_generation_prompt(testing & t);
+static void test_calculate_diff_split(testing & t);
+
+static void test_until_common_prefix_basic(testing & t);
+static void test_until_common_prefix(testing & t);
+
+static void test_after_common_suffix_basic(testing & t);
+static void test_after_common_suffix(testing & t);
+
+static void test_analyze_tool_call_pure_json(testing & t);
+static void test_analyze_tool_call_function_name_markers(testing & t);
+static void test_analyze_tool_call_full_markers(testing & t);
+static void test_analyze_tool_call_edge_cases(testing & t);
+
+static void test_compare_variants_basic(testing & t);
+static void test_compare_variants_messages_modifier(testing & t);
+static void test_compare_variants_tools_modifier(testing & t);
+static void test_compare_variants_both_modifiers(testing & t);
+static void test_compare_variants_template_failure(testing & t);
+static void test_compare_variants_identity(testing & t);
+static void test_compare_variants(testing & t);
+
+// Seed-OSS template tool calling analysis tests
+static void test_seed_oss_tool_analysis(testing & t);
+static void test_seed_oss_tool_presence(testing & t);
+static void test_seed_oss_call_count(testing & t);
+static void test_seed_oss_function_names(testing & t);
+static void test_seed_oss_argument_count(testing & t);
+static void test_seed_oss_args_presence(testing & t);
+static void test_seed_oss_tool_with_reasoning(testing & t);
+
+// Nemotron template analysis tests
+static void test_nemotron_analysis(testing & t);
+static void test_nemotron_reasoning_detection(testing & t);
+static void test_nemotron_tool_format(testing & t);
+
+// CohereForAI template analysis tests
+static void test_cohere_reasoning_detection(testing & t);
+static void test_cohere_analysis(testing & t);
+
+// SmolLM3 template analysis tests
+static void test_smollm3_analysis(testing & t);
+
+// Marker separation
+static void test_marker_separation(testing & t);
+
+// standard_json_tools format tests
+static void test_standard_json_tools_formats(testing & t);
+static void test_standard_json_tools_openai(testing & t);
+static void test_standard_json_tools_cohere(testing & t);
+static void test_standard_json_tools_function_key(testing & t);
+
+// normalize_quotes_to_json tests
+static void test_normalize_quotes_to_json(testing & t);
+static void test_normalize_quotes_with_embedded_quotes(testing & t);
+
+// TAG_WITH_TAGGED argument parsing tests
+static void test_tagged_args_with_embedded_quotes(testing & t);
+
+int main(int argc, char * argv[]) {
+    testing t(std::cout);
+    t.verbose = true;
+
+    // usage: test-chat-auto-parser-helpers [filter_regex]
+
+    if (argc > 1) {
+        t.set_filter(argv[1]);
+    }
+
+    t.test("diff_split", test_calculate_diff_split);
+    t.test("common_prefix", test_until_common_prefix);
+    t.test("common_suffix", test_after_common_suffix);
+    t.test("compare_variants", test_compare_variants);
+    t.test("segments", test_marker_separation);
+    t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
+    t.test("cohere", test_cohere_analysis);
+    t.test("nemotron", test_nemotron_analysis);
+    t.test("smollm3", test_smollm3_analysis);
+    t.test("standard_json_tools", test_standard_json_tools_formats);
+    t.test("normalize_quotes_to_json", test_normalize_quotes_to_json);
+    t.test("tagged_args_embedded_quotes", test_tagged_args_with_embedded_quotes);
+
+    return t.summary();
+}
+
+static void test_marker_separation(testing & t) {
+    auto single_square_marker = segmentize_markers("pre_marker[marker]post_marker");
+    auto single_diag_marker = segmentize_markers("pre_marker<marker>post_marker");
+    auto paired_markers = segmentize_markers("<hello>world</hello>");
+    auto double_different_markers = segmentize_markers("<hello>[hello]<world>[world]");
+    auto in_between = segmentize_markers("im<blue>daba<dee>da[hey]");
+
+    t.test("single_square_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_square_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_square_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_square_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_square_marker[0].value);
+        t.assert_equal("second is '[marker]'", "[marker]", single_square_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_square_marker[2].value);
+    });
+
+    t.test("single_diagonal_marker", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, single_diag_marker[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, single_diag_marker[1].type);
+        t.assert_equal("last is text", segment_type::TEXT, single_diag_marker[2].type);
+
+        t.assert_equal("first is 'pre_marker'", "pre_marker", single_diag_marker[0].value);
+        t.assert_equal("second is '<marker>'", "<marker>", single_diag_marker[1].value);
+        t.assert_equal("last is 'post_marker'", "post_marker", single_diag_marker[2].value);
+    });
+
+    t.test("paired_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, paired_markers[0].type);
+        t.assert_equal("second is text", segment_type::TEXT, paired_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, paired_markers[2].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", paired_markers[0].value);
+        t.assert_equal("second is 'world'", "world", paired_markers[1].value);
+        t.assert_equal("third is '</hello>'", "</hello>", paired_markers[2].value);
+    });
+
+    t.test("double_different_markers", [&] (testing & t) {
+        t.assert_equal("first is marker", segment_type::MARKER, double_different_markers[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, double_different_markers[1].type);
+        t.assert_equal("third is marker", segment_type::MARKER, double_different_markers[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, double_different_markers[3].type);
+
+        t.assert_equal("first is '<hello>'", "<hello>", double_different_markers[0].value);
+        t.assert_equal("second is '[hello]'", "[hello]", double_different_markers[1].value);
+        t.assert_equal("third is '<world>'", "<world>", double_different_markers[2].value);
+        t.assert_equal("fourth is '[world]'", "[world]", double_different_markers[3].value);
+    });
+
+    t.test("in_between", [&] (testing & t) {
+        t.assert_equal("first is text", segment_type::TEXT, in_between[0].type);
+        t.assert_equal("second is marker", segment_type::MARKER, in_between[1].type);
+        t.assert_equal("third is text", segment_type::TEXT, in_between[2].type);
+        t.assert_equal("fourth is marker", segment_type::MARKER, in_between[3].type);
+        t.assert_equal("fifth is text", segment_type::TEXT, in_between[4].type);
+        t.assert_equal("sixth is marker", segment_type::MARKER, in_between[5].type);
+
+        t.assert_equal("first is 'im'", "im", in_between[0].value);
+        t.assert_equal("second is '<blue>'", "<blue>", in_between[1].value);
+        t.assert_equal("third is 'daba'", "daba", in_between[2].value);
+        t.assert_equal("fourth is '<dee>'", "<dee>", in_between[3].value);
+        t.assert_equal("fifth is 'da'", "da", in_between[4].value);
+        t.assert_equal("sixth is '[hey]'", "[hey]", in_between[5].value);
+    });
+}
+
+static void test_calculate_diff_split(testing & t) {
+    t.test("calculate_diff_split basic", test_calculate_diff_split_basic);
+    t.test("calculate_diff_split identical", test_calculate_diff_split_identical);
+    t.test("calculate_diff_split common prefix", test_calculate_diff_split_common_prefix);
+    t.test("calculate_diff_split common suffix", test_calculate_diff_split_common_suffix);
+    t.test("calculate_diff_split common both", test_calculate_diff_split_common_both);
+    t.test("calculate_diff_split empty cases", test_calculate_diff_split_empty_cases);
+    t.test("calculate_diff_split no common", test_calculate_diff_split_no_common);
+    t.test("calculate_diff_split single char", test_calculate_diff_split_single_char);
+    t.test("calculate_diff_split overlaps", test_calculate_diff_split_overlaps);
+    t.test("calculate_diff_split tag boundaries", test_calculate_diff_split_tag_boundaries);
+    t.test("calculate_diff_split generation prompt", test_calculate_diff_split_generation_prompt);
+}
+
+static void test_calculate_diff_split_basic(testing & t) {
+    diff_split result = calculate_diff_split("hello world", "hello test");
+    t.assert_equal("prefix should be 'hello '", "hello ", result.prefix);
+    t.assert_equal("left should be 'world'", "world", result.left);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("prefixA suffix", "prefixB suffix");
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be ' suffix'", " suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_identical(testing & t) {
+    diff_split result = calculate_diff_split("hello", "hello");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("<row><row><row><your><boat><gently>", "<row><row><row><your><boat><gently>");
+    t.assert_equal("prefix should be '<row><row><row><your><boat><gently>'", "<row><row><row><your><boat><gently>", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_prefix(testing & t) {
+    diff_split result = calculate_diff_split("abcdef", "abcxyz");
+    t.assert_equal("prefix should be 'abc'", "abc", result.prefix);
+    t.assert_equal("left should be 'def'", "def", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("same", "sameagain");
+    t.assert_equal("prefix should be 'same'", "same", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'again'", "again", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_common_suffix(testing & t) {
+    diff_split result = calculate_diff_split("123end", "456end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    result = calculate_diff_split("start", "end");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'start'", "start", result.left);
+    t.assert_equal("right should be 'end'", "end", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("abcsuffix", "xyzsuffix");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be 'suffix'", "suffix", result.suffix);
+}
+
+static void test_calculate_diff_split_common_both(testing & t) {
+    diff_split result = calculate_diff_split("helloXworld", "helloYworld");
+    t.assert_equal("prefix should be 'hello'", "hello", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'world'", "world", result.suffix);
+
+    result = calculate_diff_split("ABCmiddleXYZ", "ABCdifferentXYZ");
+    t.assert_equal("prefix should be 'ABC'", "ABC", result.prefix);
+    t.assert_equal("left should be 'middle'", "middle", result.left);
+    t.assert_equal("right should be 'different'", "different", result.right);
+    t.assert_equal("suffix should be 'XYZ'", "XYZ", result.suffix);
+
+    result = calculate_diff_split("startAend", "startBend");
+    t.assert_equal("prefix should be 'start'", "start", result.prefix);
+    t.assert_equal("left should be 'A'", "A", result.left);
+    t.assert_equal("right should be 'B'", "B", result.right);
+    t.assert_equal("suffix should be 'end'", "end", result.suffix);
+
+    // Edge case: common prefix and suffix overlap
+    result = calculate_diff_split("aa", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_empty_cases(testing & t) {
+    // Empty left, non-empty right
+    diff_split result = calculate_diff_split("", "hello");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'hello'", "hello", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Non-empty left, empty right
+    result = calculate_diff_split("hello", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'hello'", "hello", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Both empty
+    result = calculate_diff_split("", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Left single char, empty right
+    result = calculate_diff_split("a", "");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Empty left, right single char
+    result = calculate_diff_split("", "a");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'a'", "a", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_no_common(testing & t) {
+    diff_split result = calculate_diff_split("abc", "xyz");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'xyz'", "xyz", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("left", "right");
+    // The algorithm finds "t" as a common suffix since both strings end with 't'
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'lef'", "lef", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be 't'", "t", result.suffix);
+
+    result = calculate_diff_split("123", "456");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be '123'", "123", result.left);
+    t.assert_equal("right should be '456'", "456", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_single_char(testing & t) {
+    diff_split result = calculate_diff_split("a", "b");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'a'", "a", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("a", "ab");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'b'", "b", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("ab", "a");
+    t.assert_equal("prefix should be 'a'", "a", result.prefix);
+    t.assert_equal("left should be 'b'", "b", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_overlaps(testing & t) {
+    // One string is substring of another
+    diff_split result = calculate_diff_split("test", "testing");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be empty", "", result.left);
+    t.assert_equal("right should be 'ing'", "ing", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    result = calculate_diff_split("testing", "test");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'ing'", "ing", result.left);
+    t.assert_equal("right should be empty", "", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Similar strings with one extra char at start
+    result = calculate_diff_split("Xtest", "Ytest");
+    // The algorithm finds "test" as a common suffix since both strings end with "test"
+    // This is the algorithm's actual behavior - it finds maximal common suffix
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be 'test'", "test", result.suffix);
+
+    // Similar strings with one extra char at end
+    result = calculate_diff_split("testX", "testY");
+    t.assert_equal("prefix should be 'test'", "test", result.prefix);
+    t.assert_equal("left should be 'X'", "X", result.left);
+    t.assert_equal("right should be 'Y'", "Y", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Strings that are reverses
+    result = calculate_diff_split("abc", "cba");
+    t.assert_equal("prefix should be empty", "", result.prefix);
+    t.assert_equal("left should be 'abc'", "abc", result.left);
+    t.assert_equal("right should be 'cba'", "cba", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+}
+
+static void test_calculate_diff_split_tag_boundaries(testing & t) {
+    // Test with unclosed XML tags
+    diff_split result = calculate_diff_split("test<tag", "test>content");
+    // The fix_tag_boundaries should move incomplete tags appropriately
+    t.assert_true("prefix should start with 'test'", result.prefix.find("test") == 0);
+    t.assert_true("should handle tag boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with unclosed brackets
+    result = calculate_diff_split("test[", "test]value");
+    t.assert_true("should handle bracket boundaries", result.left != "" || result.right != "" || result.suffix != "");
+
+    // Test with partial tags on both sides
+    result = calculate_diff_split("prefix<tag>", "prefix</tag>suffix");
+    // fix_tag_boundaries moves the incomplete '<' from prefix to left/right
+    t.assert_equal("prefix should be 'prefix'", "prefix", result.prefix);
+    t.assert_equal("left should be '<tag>'", "<tag>", result.left);
+    t.assert_equal("right should be '</tag>suffix'", "</tag>suffix", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with complex nested tags
+    result = calculate_diff_split("prefix<div>content</div>", "prefix<div>different</div>");
+    // Algorithm finds "ent</div>" as a common suffix because both strings end with it
+    // This is the actual algorithm behavior, though not semantically ideal
+    t.assert_equal("prefix should be 'prefix<div>'", "prefix<div>", result.prefix);
+    t.assert_equal("left should be 'cont'", "cont", result.left);
+    t.assert_equal("right should be 'differ'", "differ", result.right);
+    t.assert_equal("suffix should be 'ent</div>'", "ent</div>", result.suffix);
+
+    // Test with unclosed angle bracket
+    result = calculate_diff_split("Hello <world>", "Hello test");
+    t.assert_equal("prefix should be 'Hello '", "Hello ", result.prefix);
+    t.assert_true("left should contain '<world>'", result.left.find("<world>") != std::string::npos);
+    t.assert_equal("right should be 'test'", "test", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test with unclosed square bracket
+    result = calculate_diff_split("test [array]", "test other");
+    t.assert_equal("prefix should be 'test '", "test ", result.prefix);
+    t.assert_true("left should contain '[array]'", result.left.find("[array]") != std::string::npos);
+    t.assert_equal("right should be 'other'", "other", result.right);
+    t.assert_equal("suffix should be empty", "", result.suffix);
+
+    // Test empty prefix and suffix with tags
+    result = calculate_diff_split("<tag>left</tag>", "<tag>righ</tag>");
+    t.assert_equal("prefix should be '<tag>'", "<tag>", result.prefix);
+    t.assert_equal("left should be 'left'", "left", result.left);
+    t.assert_equal("right should be 'righ'", "righ", result.right);
+    t.assert_equal("suffix should be '</tag>'", "</tag>", result.suffix);
+
+    {
+        // real case from template tests, simplified
+        std::string left  = "PREFIX</think>Sure";
+        std::string right = "PREFIX<think>Lemme think</think>Sure";
+        result            = calculate_diff_split(left, right);
+        t.assert_equal("prefix should be PREFIX", "PREFIX", result.prefix);
+        t.assert_equal("suffix should be </think>Sure", "</think>Sure", result.suffix);
+        t.assert_equal("left should be empty", "", result.left);
+        t.assert_equal("right should be <think>Lemme think", "<think>Lemme think", result.right);
+    }
+
+    {
+        // Real case: special tokens with |> boundary issue
+        // The suffix starts with |> which should be moved to complete <|END_RESPONSE and <|END_ACTION
+        std::string prefix    = "SOME_PREFIX";
+        std::string suffix    = "|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
+        std::string left_diff = "<|START_RESPONSE|>Let me help you.<|END_RESPONSE";
+        std::string right_diff =
+            "<|START_THINKING|><|END_THINKING|><|START_ACTION|>[\n"
+            "    {\"tool_call_id\": \"0\", \"tool_name\": \"test_function_name\", "
+            "\"parameters\": {\"param1\": \"value1\", \"param2\": \"value2\"}}\n"
+            "]<|END_ACTION";
+
+        std::string left  = prefix + left_diff + suffix;
+        std::string right = prefix + right_diff + suffix;
+        result            = calculate_diff_split(left, right);
+
+        t.assert_equal("special token prefix", prefix, result.prefix);
+        // The |> should be moved from suffix to complete the tokens
+        t.assert_equal("special token left", "<|START_RESPONSE|>Let me help you.<|END_RESPONSE|>", result.left);
+        t.assert_true("special token right ends with |>", result.right.find("<|END_ACTION|>") != std::string::npos);
+        t.assert_equal("special token suffix", "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
+                       result.suffix);
+    }
+}
+
+static void test_calculate_diff_split_generation_prompt(testing & t) {
+    // ChatML thinking template: left is a prefix of right, generation_prompt is the appended part.
+    // The trailing \n in left matches the trailing \n in the generation_prompt, causing
+    // the suffix matcher to steal it and rotate the diff result.
+    {
+        // Simplified reproduction: left ends with \n, right = left + "<|im_start|>assistant\n<think>\n"
+        std::string left  = "<|im_start|>user\nHello<|im_end|>\n";
+        std::string right = left + "<|im_start|>assistant\n<think>\n";
+        diff_split result = calculate_diff_split(left, right);
+        t.assert_equal("chatml prefix", left, result.prefix);
+        t.assert_equal("chatml left", "", result.left);
+        t.assert_equal("chatml right should be generation prompt",
+                       "<|im_start|>assistant\n<think>\n", result.right);
+        t.assert_equal("chatml suffix", "", result.suffix);
+    }
+
+    {
+        // More realistic: longer conversation ending with tool_response
+        std::string common =
+            "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n"
+            "<|im_start|>user\nSearch for files<|im_end|>\n"
+            "<|im_start|>assistant\n<think>\nLet me search.\n</think>\n\n"
+            "<tool_call>\n<function=search>\n</function>\n</tool_call><|im_end|>\n"
+            "<|im_start|>user\n<tool_response>\nNo files found\n</tool_response><|im_end|>\n";
+        std::string left  = common;
+        std::string right = common + "<|im_start|>assistant\n<think>\n";
+        diff_split result = calculate_diff_split(left, right);
+        t.assert_equal("tool_response left", "", result.left);
+        t.assert_equal("tool_response right should be generation prompt",
+                       "<|im_start|>assistant\n<think>\n", result.right);
+    }
+}
+
+static void test_until_common_prefix(testing & t) {
+    t.test("until_common_prefix basic", test_until_common_prefix_basic);
+}
+
+static void test_until_common_prefix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = until_common_prefix("<function name=foo><arg name=bar>", "<arg name=bar>", "<arg name=baz>");
+    t.assert_equal("untilCommonPrefix should return '<function name=foo>'", "<function name=foo>", result);
+
+    // Additional test cases to ensure robustness
+    // Test with different common prefix lengths
+    result = until_common_prefix("prefix<test>suffix", "<test>different", "<test>other");
+    t.assert_equal("should return 'prefix'", "prefix", result);
+
+    // Test when common prefix is at the start
+    result = until_common_prefix("<common>rest", "<common>left", "<common>right");
+    t.assert_equal("should return empty string when common prefix at start", "", result);
+
+    // Test when there's no common prefix
+    result = until_common_prefix("something", "left", "right");
+    t.assert_equal("should return empty string when no common prefix", "", result);
+
+    // Test with empty strings
+    result = until_common_prefix("test", "", "right");
+    t.assert_equal("should return empty string when left is empty", "", result);
+
+    // Test with longer common prefix
+    result = until_common_prefix("abcXYZ<shared_prefix>rest", "<shared_prefix>left", "<shared_prefix>right");
+    t.assert_equal("should return 'abcXYZ'", "abcXYZ", result);
+}
+
+static void test_after_common_suffix(testing & t) {
+    t.test("after_common_suffix basic", test_after_common_suffix_basic);
+}
+
+static void test_after_common_suffix_basic(testing & t) {
+    // Test case from the user request
+    std::string result = after_common_suffix("<function name=foo><arg name=bar>100</arg></function>",
+                                            "<arg name=bar>100</arg>",
+                                            "<arg name=baz>535</arg>");
+    t.assert_equal("afterCommonSuffix should return '</function>'", "</function>", result);
+
+    // Test when common suffix is at the end
+    result = after_common_suffix("rest<common>", "left<common>", "right<common>");
+    t.assert_equal("should return empty string when common suffix at end", "", result);
+
+    // Test with empty strings
+    result = after_common_suffix("test", "left", "");
+    t.assert_equal("should return empty string when right is empty", "", result);
+
+    // Test case with XML-like structure similar to the main example
+    result = after_common_suffix("<outer><inner>value</inner></outer>",
+                                "<inner>value</inner>",
+                                "<inner>different</inner>");
+    t.assert_equal("should return '</outer>'", "</outer>", result);
+
+    // Test with longer common suffix appearing at the end of full
+    result = after_common_suffix("prefix<shared>rest</shared>", "prefix<shared>left</shared>", "prefix<shared>right</shared>");
+    t.assert_equal("should return '' when common suffix is at end of full", "", result);
+
+    // Test with common suffix appearing in middle but not at end
+    result = after_common_suffix("<tag>content</tag><extra>", "<tag>value</tag>", "<tag>other</tag>");
+    t.assert_equal("should return '<extra>' when common suffix appears before end", "<extra>", result);
+
+    // Test with multi-character common suffix at the very end of full
+    result = after_common_suffix("start<middle>end</middle>", "prefix<middle>left</middle>", "prefix<middle>right</middle>");
+    t.assert_equal("should return '' when common suffix </middle> is at end of full", "", result);
+}
+
+static void test_compare_variants(testing & t) {
+    t.test("compare_variants basic", test_compare_variants_basic);
+    t.test("compare_variants messages modifier", test_compare_variants_messages_modifier);
+    t.test("compare_variants tools modifier", test_compare_variants_tools_modifier);
+    t.test("compare_variants both modifiers", test_compare_variants_both_modifiers);
+    t.test("compare_variants template failure", test_compare_variants_template_failure);
+    t.test("compare_variants identity", test_compare_variants_identity);
+}
+
+static void test_compare_variants_basic(testing & t) {
+    // Create a simple template that just echoes messages
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    // The template might not output anything if messages is empty or format is different
+    // Check that we get a valid result
+    t.assert_true("prefix or left should have content", !result->diff.prefix.empty() || !result->diff.left.empty());
+}
+
+static void test_compare_variants_messages_modifier(testing & t) {
+    // Test with messages modifier only
+    common_chat_template tmpl("{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+    };
+
+    std::optional<compare_variants_result> result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'A'", "A", result->diff.left);
+    t.assert_equal("right should be 'B'", "B", result->diff.right);
+}
+
+static void test_compare_variants_tools_modifier(testing & t) {
+    // Test with tools modifier only
+    common_chat_template tmpl(
+        "{% for tool in tools %}{{ tool['name'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.tools = json::array({
+        json {{"name", "foo"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.tools[0]["name"] = "bar";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'foo'", "foo", result->diff.left);
+    t.assert_equal("right should be 'bar'", "bar", result->diff.right);
+}
+
+static void test_compare_variants_both_modifiers(testing & t) {
+    // Test with both messages and tools modifiers using the for loop approach
+    common_chat_template tmpl(
+        "{% for message in messages %}{{ message['role'] }}:{{ message['content'] }}{% endfor %}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "A"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "B";
+        p.messages[0]["role"] = "newuser";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("left should be 'user:A'", "user:A", result->diff.left);
+    t.assert_equal("right should be 'newuser:B'", "newuser:B", result->diff.right);
+}
+
+static void test_compare_variants_template_failure(testing & t) {
+    // Test with template that causes failure during application (not construction)
+    // We use a valid template syntax but one that will fail during application
+    common_chat_template tmpl("{{ messages[0]['nonexistent_field'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    auto modifier = [](template_params & p) {
+        p.messages[0]["content"] = "World";
+    };
+
+    auto result = ::compare_variants(tmpl, params, modifier);
+
+    t.assert_true("result should be nullopt on template failure", !result.has_value());
+}
+
+static void test_compare_variants_identity(testing & t) {
+    // Test with identity modifier (no change)
+    common_chat_template tmpl("{{ messages[0]['content'] }}", "", "");
+
+    template_params params;
+    params.messages = json::array({
+        json {{"role", "user"}, {"content", "Hello"}}
+    });
+
+    // No modifier - should use identity
+    auto result = ::compare_variants(tmpl, params, nullptr);
+
+    if (!t.assert_true("result should have value", result.has_value())) {
+        return;
+    }
+    t.assert_equal("prefix should be 'Hello'", "Hello", result->diff.prefix);
+    t.assert_equal("left should be empty", "", result->diff.left);
+    t.assert_equal("right should be empty", "", result->diff.right);
+    t.assert_equal("suffix should be empty", "", result->diff.suffix);
+}
+
+// ============================================================================
+// Seed-OSS Template Tool Calling Analysis Tests
+// ============================================================================
+
+static void test_seed_oss_tool_analysis(testing & t) {
+    t.test("Seed-OSS tool presence", test_seed_oss_tool_presence);
+    t.test("Seed-OSS call count", test_seed_oss_call_count);
+    t.test("Seed-OSS function names", test_seed_oss_function_names);
+    t.test("Seed-OSS argument count", test_seed_oss_argument_count);
+    t.test("Seed-OSS args presence", test_seed_oss_args_presence);
+    t.test("Seed-OSS tool with reasoning", test_seed_oss_tool_with_reasoning);
+}
+
+// Helper to load Seed-OSS template
+static common_chat_template load_seed_oss_template(testing & t) {
+    std::string template_path = "models/templates/ByteDance-Seed-OSS.jinja";
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Seed-OSS template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}
+
+// Helper to build tool call JSON
+static json build_tool_call(const std::string & name, const json & args, const std::string & id = "call_001") {
+    return json{
+        {"id", id},
+        {"type", "function"},
+        {"function", json{
+            {"name", name},
+            {"arguments", args}
+        }}
+    };
+}
+
+// Helper to build tools definition
+static json build_tools_definition() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["param1"] = json::object({
+        {"type", "string"},
+        {"description", "First parameter"}
+    });
+    parameters_schema["properties"]["param2"] = json::object({
+        {"type", "string"},
+        {"description", "Second parameter"}
+    });
+    parameters_schema["required"] = json::array({"param1", "param2"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_function_name"},
+                {"description", "A test function for debugging"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// T1: Compare with/without tool call (user, assistant)
+static void test_seed_oss_tool_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_no_tools = json{
+        {"role", "assistant"},
+        {"content", "Let me help you."}
+    };
+
+    json assistant_with_tools = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_no_tools;
+    params_no_tools.messages = json::array({user_msg, assistant_no_tools});
+    params_no_tools.tools = build_tools_definition();
+    params_no_tools.add_generation_prompt = false;
+    params_no_tools.enable_thinking = true;
+
+    template_params params_with_tools;
+    params_with_tools.messages = json::array({user_msg, assistant_with_tools});
+    params_with_tools.tools = build_tools_definition();
+    params_with_tools.add_generation_prompt = false;
+    params_with_tools.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_no_tools,
+        [&](template_params & p) {
+            p.messages = params_with_tools.messages;
+        });
+
+    if (!t.assert_true("T1 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+    t.assert_true("T1 prefix should contain system", diff.prefix.find("system") != std::string::npos);
+    t.assert_true("T1 prefix should contain user", diff.prefix.find("user") != std::string::npos);
+    t.assert_true("T1 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Left should be the assistant content without tool
+    t.assert_equal("T1 left should contain 'Let me help you.'", "Let me help you.", diff.left);
+
+    // Right should contain the tool call markers
+    t.assert_true("T1 right should contain tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T1 right should contain function tag", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param1", diff.right.find("<parameter=param1>") != std::string::npos);
+    t.assert_true("T1 right should contain parameter=param2", diff.right.find("<parameter=param2>") != std::string::npos);
+    t.assert_true("T1 right should contain value1", diff.right.find("value1") != std::string::npos);
+    t.assert_true("T1 right should contain value2", diff.right.find("value2") != std::string::npos);
+    t.assert_true("T1 right should contain tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should be the eos token
+    t.assert_equal("T1 suffix should be '<seed:eos>'", "<seed:eos>", diff.suffix);
+}
+
+// T2: Compare one vs two tool calls
+static void test_seed_oss_call_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_one_call = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_two_calls = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}})),
+            build_tool_call("test_function_name", json::object({{"param1", "value3"}, {"param2", "value4"}}), "call_002")
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_call});
+    params_one.tools = build_tools_definition();
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_calls});
+        });
+
+    if (!t.assert_true("T2 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    // Prefix should include the first tool call
+    t.assert_true("T2 prefix should contain first tool_call begin", diff.prefix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 prefix should contain first function", diff.prefix.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 prefix should contain value1", diff.prefix.find("value1") != std::string::npos);
+    t.assert_true("T2 prefix should contain value2", diff.prefix.find("value2") != std::string::npos);
+    t.assert_true("T2 prefix should contain first tool_call end", diff.prefix.find("</seed:tool_call>") != std::string::npos);
+
+    // Left should be empty (no second tool call in variant A)
+    t.assert_equal("T2 left should be empty", "", diff.left);
+
+    // Right should contain the second tool call
+    t.assert_true("T2 right should contain second tool_call begin", diff.right.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T2 right should contain second function", diff.right.find("<function=test_function_name>") != std::string::npos);
+    t.assert_true("T2 right should contain value3", diff.right.find("value3") != std::string::npos);
+    t.assert_true("T2 right should contain value4", diff.right.find("value4") != std::string::npos);
+    t.assert_true("T2 right should contain second tool_call end", diff.right.find("</seed:tool_call>") != std::string::npos);
+
+    // Suffix should end with the eos token
+    t.assert_equal("T2 suffix should end with '<seed:eos>'", "<seed:eos>", diff.suffix.substr(diff.suffix.length() - 10, 10));
+}
+
+// T3: Compare different function names
+static void test_seed_oss_function_names(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with two different function names
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    parameters_schema["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_alpha"},
+                {"description", "First function"},
+                {"parameters", parameters_schema}
+            }}
+        },
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "func_beta"},
+                {"description", "Second function"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+
+    json assistant_func_alpha = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_alpha", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json assistant_func_beta = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("func_beta", json::object({{"arg1", "test_value"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_alpha;
+    params_alpha.messages = json::array({user_msg, assistant_func_alpha});
+    params_alpha.tools = tools;
+    params_alpha.add_generation_prompt = false;
+    params_alpha.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_alpha,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_func_beta});
+        });
+
+    if (!t.assert_true("T3 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    bool func_alpha_in_left = diff.left.find("func_alpha") != std::string::npos;
+    bool func_alpha_in_prefix = diff.prefix.find("func_alpha") != std::string::npos;
+    bool func_beta_in_right = diff.right.find("func_beta") != std::string::npos;
+    bool func_beta_in_prefix = diff.prefix.find("func_beta") != std::string::npos;
+    bool func_beta_in_suffix = diff.suffix.find("func_beta") != std::string::npos;
+
+    // Left should contain func_alpha (or be in prefix)
+    t.assert_true("T3 left should contain func_alpha (or prefix)", func_alpha_in_left || func_alpha_in_prefix);
+
+    // Right should contain func_beta
+    t.assert_true("T3 right should contain func_beta", func_beta_in_right || func_beta_in_prefix || func_beta_in_suffix);
+
+    // Both should have the same parameter value (in common parts, not in diffs)
+    // Since both have same args, test_value will be in prefix/suffix
+    t.assert_true("T3 diff should contain test_value (in prefix or suffix)",
+        diff.prefix.find("test_value") != std::string::npos || diff.suffix.find("test_value") != std::string::npos);
+}
+
+// T4: Compare different argument counts (zero, one, two parameters)
+static void test_seed_oss_argument_count(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    // Build tools with 0, 1, or 2 required parameters
+    json params_2_required = json::object();
+    params_2_required["type"] = "object";
+    params_2_required["properties"] = json::object();
+    params_2_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_2_required["properties"]["arg2"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 2"}
+    });
+    params_2_required["required"] = json::array({"arg1", "arg2"});
+
+    json params_1_required = json::object();
+    params_1_required["type"] = "object";
+    params_1_required["properties"] = json::object();
+    params_1_required["properties"]["arg1"] = json::object({
+        {"type", "string"},
+        {"description", "Argument 1"}
+    });
+    params_1_required["required"] = json::array({"arg1"});
+
+    json tools = json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "test_func"},
+                {"description", "Test function"},
+                {"parameters", params_2_required}
+            }}
+        }
+    });
+
+    // Test: zero args vs one arg
+    json assistant_zero_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object())
+        })}
+    };
+
+    json assistant_one_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}}))
+        })}
+    };
+
+    json assistant_two_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_func", json::object({{"arg1", "value1"}, {"arg2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    // Test zero vs one
+    template_params params_zero;
+    params_zero.messages = json::array({user_msg, assistant_zero_args});
+    params_zero.tools = tools;
+    params_zero.add_generation_prompt = false;
+    params_zero.enable_thinking = true;
+
+    auto result_zero_one = ::compare_variants(tmpl, params_zero,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_one_arg});
+        });
+
+    if (!t.assert_true("T4 zero vs one result should have value", result_zero_one.has_value())) {
+        return;
+    }
+    t.assert_true("T4 zero vs one left should be empty or minimal", result_zero_one->diff.left.empty() || result_zero_one->diff.left == "");
+    t.assert_true("T4 zero vs one right should contain arg1", result_zero_one->diff.right.find("arg1") != std::string::npos);
+
+    // Test one vs two
+    template_params params_one;
+    params_one.messages = json::array({user_msg, assistant_one_arg});
+    params_one.tools = tools;
+    params_one.add_generation_prompt = false;
+    params_one.enable_thinking = true;
+
+    auto result_one_two = ::compare_variants(tmpl, params_one,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_two_args});
+        });
+
+    if (!t.assert_true("T4 one vs two result should have value", result_one_two.has_value())) {
+        return;
+    }
+
+    const auto & diff4 = result_one_two->diff;
+    t.assert_true("T4 one vs two left should contain arg1 (or prefix)",
+        diff4.left.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg1 (or prefix)",
+        diff4.right.find("arg1") != std::string::npos || diff4.prefix.find("arg1") != std::string::npos);
+    t.assert_true("T4 one vs two right should contain arg2 (or prefix/suffix)",
+        diff4.right.find("arg2") != std::string::npos || diff4.prefix.find("arg2") != std::string::npos || diff4.suffix.find("arg2") != std::string::npos);
+}
+
+// T5: Compare different argument values
+static void test_seed_oss_args_presence(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_same_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}}))
+        })}
+    };
+
+    json assistant_other_arg = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_both_args = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello"}
+    };
+
+    template_params params_same;
+    params_same.messages = json::array({user_msg, assistant_same_arg});
+    params_same.tools = build_tools_definition();
+    params_same.add_generation_prompt = false;
+    params_same.enable_thinking = true;
+
+    // Test same arg vs other arg
+    auto result_same_other = ::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_other_arg});
+        });
+
+    if (!t.assert_true("T5 same vs other result should have value", result_same_other.has_value())) {
+        return;
+    }
+    const auto & diff5a = result_same_other->diff;
+    t.assert_true("T5 same vs other left should contain param1 (or prefix/suffix)",
+        diff5a.left.find("param1") != std::string::npos || diff5a.prefix.find("param1") != std::string::npos || diff5a.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs other left should contain value1 (or prefix/suffix)",
+        diff5a.left.find("value1") != std::string::npos || diff5a.prefix.find("value1") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain param2 (or prefix/suffix)",
+        diff5a.right.find("param2") != std::string::npos || diff5a.prefix.find("param2") != std::string::npos || diff5a.suffix.find("param2") != std::string::npos);
+    t.assert_true("T5 same vs other right should contain value2 (or prefix/suffix)",
+        diff5a.right.find("value2") != std::string::npos || diff5a.prefix.find("value2") != std::string::npos || diff5a.suffix.find("value2") != std::string::npos);
+
+    // Test same arg vs both args
+    auto result_same_both = ::compare_variants(tmpl, params_same,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_both_args});
+        });
+
+    if (!t.assert_true("T5 same vs both result should have value", result_same_both.has_value())) {
+        return;
+    }
+    const auto & diff5b = result_same_both->diff;
+    t.assert_true("T5 same vs both left should contain param1 (or prefix/suffix)",
+        diff5b.left.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param1 (or prefix/suffix)",
+        diff5b.right.find("param1") != std::string::npos || diff5b.prefix.find("param1") != std::string::npos || diff5b.suffix.find("param1") != std::string::npos);
+    t.assert_true("T5 same vs both right should contain param2 (or prefix/suffix)",
+        diff5b.right.find("param2") != std::string::npos || diff5b.prefix.find("param2") != std::string::npos || diff5b.suffix.find("param2") != std::string::npos);
+}
+
+// T6: Tool call with vs without reasoning_content
+static void test_seed_oss_tool_with_reasoning(testing & t) {
+    common_chat_template tmpl = load_seed_oss_template(t);
+
+    json assistant_tool_only = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })}
+    };
+
+    json assistant_tool_with_reasoning = json{
+        {"role", "assistant"},
+        {"content", nullptr},
+        {"tool_calls", json::array({
+            build_tool_call("test_function_name", json::object({{"param1", "value1"}, {"param2", "value2"}}))
+        })},
+        {"reasoning_content", "I need to call the tool first."}
+    };
+
+    json user_msg = json{
+        {"role", "user"},
+        {"content", "Hello, please help me."}
+    };
+
+    template_params params_tool_only;
+    params_tool_only.messages = json::array({user_msg, assistant_tool_only});
+    params_tool_only.tools = build_tools_definition();
+    params_tool_only.add_generation_prompt = false;
+    params_tool_only.enable_thinking = true;
+
+    auto result = ::compare_variants(tmpl, params_tool_only,
+        [&](template_params & p) {
+            p.messages = json::array({user_msg, assistant_tool_with_reasoning});
+        });
+
+    if (!t.assert_true("T6 result should have value", result.has_value())) {
+        return;
+    }
+
+    const auto & diff = result->diff;
+
+    // Left should be empty (no reasoning in variant A)
+    t.assert_equal("T6 left should be empty", "", diff.left);
+
+    // Right should contain the thinking token with reasoning content
+    t.assert_true("T6 right should contain think begin", diff.right.find("<seed:think>") != std::string::npos);
+    t.assert_true("T6 right should contain reasoning content", diff.right.find("I need to call the tool first.") != std::string::npos);
+    t.assert_true("T6 right should contain think end", diff.right.find("</seed:think>") != std::string::npos);
+
+    // Prefix should contain the assistant role
+    t.assert_true("T6 prefix should contain assistant", diff.prefix.find("assistant") != std::string::npos);
+
+    // Suffix should contain the tool call
+    t.assert_true("T6 suffix should contain tool_call begin", diff.suffix.find("<seed:tool_call>") != std::string::npos);
+    t.assert_true("T6 suffix should contain function name", diff.suffix.find("test_function_name") != std::string::npos);
+    t.assert_true("T6 suffix should contain eos", diff.suffix.find("<seed:eos>") != std::string::npos);
+}
+
+static common_chat_template load_template(testing & t, const std::string & template_path) {
+    std::ifstream fin(template_path, std::ios::binary);
+    std::ostringstream buf;
+    if (fin.is_open()) {
+        buf << fin.rdbuf();
+    }
+    std::string template_source = buf.str();
+    common_chat_template tmpl(template_source, "", "");
+    t.assert_true("Nemotron template loaded successfully", template_source.length() > 0);
+    return tmpl;
+}
+
+// ============================================================================
+// Nemotron Template Analysis Tests
+// ============================================================================
+static common_chat_template load_nemotron_template(testing & t) {
+    return load_template(t, "models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
+}
+
+static void test_nemotron_analysis(testing & t) {
+    t.test("Nemotron reasoning detection", test_nemotron_reasoning_detection);
+    t.test("Nemotron tool format", test_nemotron_tool_format);
+}
+
+static void test_nemotron_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Test the comparison manually to see what's happening
+    json user_msg = json{ { "role", "user" }, { "content", "Hello" } };
+    json assistant_no_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." }
+    };
+    json assistant_with_reasoning = json{
+        { "role", "assistant" },
+        { "content", "I can help." },
+        { "reasoning_content", "Let me think about this." }
+    };
+
+    template_params params;
+    params.messages = json::array({ user_msg, assistant_no_reasoning });
+    params.add_generation_prompt = false;
+    params.enable_thinking = true;
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check reasoning markers
+    t.assert_equal("reasoning_start should be '<think>\\n'", "<think>\n", analysis.reasoning.start);
+    t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
+
+    // Check reasoning mode detection
+    // Nemotron uses tag-based reasoning; prefill handles the template's forced markers
+    t.assert_equal("reasoning should be TAG_BASED", reasoning_mode::TAG_BASED, analysis.reasoning.mode);
+
+    // Make sure reasoning markers don't spill over to content markers
+    t.assert_equal("content start should be empty", "", analysis.content.start);
+    t.assert_equal("content end should be empty", "", analysis.content.end);
+
+    t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode);
+}
+
+static void test_nemotron_tool_format(testing & t) {
+    common_chat_template tmpl = load_nemotron_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check tool markers - Nemotron uses per-call wrapping (each call individually wrapped)
+    t.assert_equal("tool_section_start should be empty (per-call format)", "", analysis.tools.format.section_start);
+    t.assert_equal("tool_section_end should be empty (per-call format)", "", analysis.tools.format.section_end);
+    t.assert_equal("per_call_start should be '<tool_call>\\n'", "<tool_call>\n", analysis.tools.format.per_call_start);
+    t.assert_equal("per_call_end should be '</tool_call>'", "</tool_call>", analysis.tools.format.per_call_end);
+    t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+
+    // Check function markers
+    t.assert_equal("func_name_prefix should be '<function='", "<function=", analysis.tools.function.name_prefix);
+    t.assert_equal("func_name_suffix should be '>\\n'", ">\n", analysis.tools.function.name_suffix);
+    t.assert_equal("func_close should be '</function>\\n'", "</function>\n", analysis.tools.function.close);
+
+    // Check argument markers (note: markers retain trailing newlines for proper parsing)
+    t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
+    t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
+    t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
+
+    // Check format classification
+    t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
+
+    // Verify tool support
+    t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+}
+
+static common_chat_template load_cohere_template(testing & t) {
+    return load_template(t, "models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
+}
+
+static void test_cohere_analysis(testing & t) {
+    t.test("Cohere reasoning detection", test_cohere_reasoning_detection);
+}
+
+static void test_cohere_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check reasoning markers - Cohere uses special token format
+    t.assert_equal("reasoning_start should be '<|START_THINKING|>'", "<|START_THINKING|>", analysis.reasoning.start);
+    t.assert_equal("reasoning_end should be '<|END_THINKING|>'", "<|END_THINKING|>", analysis.reasoning.end);
+
+    // Check reasoning mode - Cohere only shows reasoning with tool calls (TOOLS_ONLY)
+    t.assert_equal("reasoning should be TOOLS_ONLY", reasoning_mode::TOOLS_ONLY, analysis.reasoning.mode);
+
+    // Check content markers - Cohere wraps all content with START/END_RESPONSE
+    t.assert_equal("content_start should be '<|START_RESPONSE|>'", "<|START_RESPONSE|>", analysis.content.start);
+    t.assert_equal("content_end should be '<|END_RESPONSE|>'", "<|END_RESPONSE|>", analysis.content.end);
+
+    // Content is always wrapped (both with and without tools)
+    t.assert_equal("content should be ALWAYS_WRAPPED", content_mode::ALWAYS_WRAPPED, analysis.content.mode);
+}
+
+static void test_tool_format_cohere(testing & t) {
+    common_chat_template tmpl = load_cohere_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // Check tool section markers - Cohere uses ACTION markers
+    t.assert_equal("tool_section_start should be '<|START_ACTION|>'", "<|START_ACTION|>", analysis.tools.format.section_start);
+    t.assert_equal("tool_section_end should be '<|END_ACTION|>'", "<|END_ACTION|>", analysis.tools.format.section_end);
+
+    // JSON_NATIVE format has no per-call markers
+    t.assert_equal("per_call_start should be empty", "", analysis.tools.format.per_call_start);
+    t.assert_equal("per_call_end should be empty", "", analysis.tools.format.per_call_end);
+
+    // JSON_NATIVE format has empty function markers (no XML-style markers)
+    t.assert_equal("func_name_prefix should be empty", "", analysis.tools.function.name_prefix);
+    t.assert_equal("func_name_suffix should be empty", "", analysis.tools.function.name_suffix);
+    t.assert_equal("func_close should be empty", "", analysis.tools.function.close);
+
+    // JSON_NATIVE format has empty args markers
+    t.assert_equal("args_start should be empty", "", analysis.tools.arguments.start);
+    t.assert_equal("args_end should be empty", "", analysis.tools.arguments.end);
+
+    // JSON_NATIVE format has empty argument markers
+    t.assert_equal("arg_name_prefix should be empty", "", analysis.tools.arguments.name_prefix);
+    t.assert_equal("arg_name_suffix should be empty", "", analysis.tools.arguments.name_suffix);
+    t.assert_equal("arg_value_prefix should be empty", "", analysis.tools.arguments.value_prefix);
+    t.assert_equal("arg_value_suffix should be empty", "", analysis.tools.arguments.value_suffix);
+    t.assert_equal("arg_separator should be empty", "", analysis.tools.arguments.separator);
+
+    // Check JSON field names - Cohere uses non-standard names
+    t.assert_equal("name_field should be 'tool_name'", "tool_name", analysis.tools.format.name_field);
+    t.assert_equal("args_field should be 'parameters'", "parameters", analysis.tools.format.args_field);
+    // This isn't a real tool call id field, i.e. with the OpenAI tool call ID format
+    t.assert_equal("id_field should be 'tool_call_id'", "", analysis.tools.format.id_field);
+
+    // Check format classification
+    t.assert_equal("tool format should be JSON_NATIVE", tool_format::JSON_NATIVE, analysis.tools.format.mode);
+
+    // Check flags
+    t.assert_true("should support tools", analysis.jinja_caps.supports_tools);
+    t.assert_true("should support parallel calls", analysis.jinja_caps.supports_parallel_tool_calls);
+    t.assert_true("should not require nonnull content", !analysis.content.requires_nonnull_content);
+    t.assert_true("tools_array_wrapped should be true", analysis.tools.format.tools_array_wrapped);
+}
+
+// ============================================================================
+// SmolLM3 Template Analysis Tests
+// Tests for templates that change system message when enable_thinking flips
+// and prefill an empty <think></think> block in no-think mode.
+// ============================================================================
+static common_chat_template load_smollm3_template(testing & t) {
+    return load_template(t, "models/templates/HuggingFaceTB-SmolLM3-3B.jinja");
+}
+
+static void test_smollm3_reasoning_detection(testing & t);
+
+static void test_smollm3_analysis(testing & t) {
+    t.test("SmolLM3 reasoning detection", test_smollm3_reasoning_detection);
+}
+
+static void test_smollm3_reasoning_detection(testing & t) {
+    common_chat_template tmpl = load_smollm3_template(t);
+
+    // Run differential analysis
+    struct autoparser analysis;
+    analysis.analyze_template(tmpl);
+
+    // SmolLM3 uses <think>/<think> reasoning tags.
+    // The template changes the entire system message when enable_thinking flips,
+    // so the analyzer must compare isolated generation prompts (not full outputs).
+    t.assert_equal("reasoning_start should be '<think>'", "<think>", analysis.reasoning.start);
+    t.assert_equal("reasoning_end should be '</think>'", "</think>", analysis.reasoning.end);
+    t.assert_equal("reasoning should be TAG_BASED", reasoning_mode::TAG_BASED, analysis.reasoning.mode);
+
+    // Content should remain plain (no wrappers)
+    t.assert_equal("content start should be empty", "", analysis.content.start);
+    t.assert_equal("content end should be empty", "", analysis.content.end);
+    t.assert_equal("content should be PLAIN", content_mode::PLAIN, analysis.content.mode);
+
+    // Preserved tokens should include the reasoning markers
+    bool has_think_start = std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "<think>") != analysis.preserved_tokens.end();
+    bool has_think_end = std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</think>") != analysis.preserved_tokens.end();
+    t.assert_true("preserved_tokens should contain '<think>'", has_think_start);
+    t.assert_true("preserved_tokens should contain '</think>'", has_think_end);
+}
+
+// ============================================================================
+// standard_json_tools Format Tests
+// ============================================================================
+
+// Helper to build tools definition for tests
+static json build_test_tools() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["location"] = json::object({
+        {"type", "string"},
+        {"description", "The city and state"}
+    });
+    parameters_schema["properties"]["unit"] = json::object({
+        {"type", "string"},
+        {"description", "Temperature unit"},
+        {"enum", json::array({"celsius", "fahrenheit"})}
+    });
+    parameters_schema["required"] = json::array({"location"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "get_current_weather"},
+                {"description", "Get the current weather in a given location"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+static void test_standard_json_tools_formats(testing & t) {
+    t.test("OpenAI format", test_standard_json_tools_openai);
+    t.test("Cohere format", test_standard_json_tools_cohere);
+    t.test("function-as-key format", test_standard_json_tools_function_key);
+}
+
+// Test 1: OpenAI Standard Format
+// {"id": "call_abc", "function": {"name": "get_weather", "arguments": {"location": "NYC"}}}
+static void test_standard_json_tools_openai(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_call>", "</tool_call>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "function.name",
+            /* args_key */ "function.arguments",
+            /* array_wrapped */ false,
+            /* function_is_key */ false,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_call>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me check the weather."
+        "<tool_call>"
+        R"({"id": "call_abc123", "function": {"name": "get_current_weather", "arguments": {"location": "NYC"}}})"
+        "</tool_call>";
+
+    common_peg_parse_context ctx(input);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call_abc123", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me check the weather") != std::string::npos);
+}
+
+// Test 2: Cohere Format
+// {"tool_call_id": 0, "tool_name": "get_weather", "parameters": {"location": "NYC"}}
+static void test_standard_json_tools_cohere(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<|START_ACTION|>[", "]<|END_ACTION|>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "tool_name",
+            /* args_key */ "parameters",
+            /* array_wrapped */ false,  // Brackets are part of section markers
+            /* function_is_key */ false,
+            /* call_id_key */ "",
+            /* gen_call_id_key */ "tool_call_id",
+            /* parameters_order */ {"tool_call_id", "tool_name", "parameters"}
+        );
+        return p.content(p.until("<|START_ACTION|>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "Let me search for that."
+        "<|START_ACTION|>["
+        R"({"tool_call_id": 0, "tool_name": "get_current_weather", "parameters": {"location": "NYC", "unit": "celsius"}})"
+        "]<|END_ACTION|>";
+
+    common_peg_parse_context ctx(input);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "0", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("Let me search") != std::string::npos);
+}
+
+// Test 3: Function-as-Key Format
+// {"get_current_weather": {"id": "call-0001", "args": {"location": "NYC"}}}
+static void test_standard_json_tools_function_key(testing & t) {
+    json tools = build_test_tools();
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto tool_call = p.standard_json_tools(
+            "<tool_calls>[", "]</tool_calls>", tools,
+            /* parallel */ true,
+            /* force */ false,
+            /* name_key */ "",  // Name is the key itself
+            /* args_key */ "args",
+            /* array_wrapped */ false,
+            /* function_is_key */ true,
+            /* call_id_key */ "id",
+            /* gen_call_id_key */ "",
+            /* parameters_order */ {}
+        );
+        return p.content(p.until("<tool_calls>")) + p.optional(tool_call) + p.end();
+    });
+
+    std::string input =
+        "I'll call the weather function."
+        "<tool_calls>["
+        R"({"get_current_weather": {"id": "call-0001", "args": {"location": "NYC", "unit": "celsius"}}})"
+        "]</tool_calls>";
+
+    common_peg_parse_context ctx(input);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+        t.assert_equal("tool id", "call-0001", msg.tool_calls[0].id);
+    }
+    t.assert_true("content present", msg.content.find("I'll call the weather") != std::string::npos);
+}
+
+// ============================================================================
+// normalize_quotes_to_json Tests
+// ============================================================================
+
+// Copy of the function for isolated testing (original is static in chat-peg-parser.cpp)
+static std::string normalize_quotes_to_json(const std::string & input) {
+    std::string result;
+    result.reserve(input.size() + 16);
+
+    bool in_single_quoted = false;
+    bool in_double_quoted = false;
+
+    for (size_t i = 0; i < input.size(); ++i) {
+        char c = input[i];
+
+        if (c == '\\' && i + 1 < input.size()) {
+            char next = input[i + 1];
+
+            if (in_single_quoted) {
+                if (next == '\'') {
+                    result += '\'';
+                    ++i;
+                    continue;
+                }
+                if (next == '"') {
+                    result += "\\\"";
+                    ++i;
+                    continue;
+                }
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            if (in_double_quoted) {
+                result += c;
+                result += next;
+                ++i;
+                continue;
+            }
+
+            result += c;
+            continue;
+        }
+
+        if (c == '"') {
+            if (in_single_quoted) {
+                result += "\\\"";
+            } else {
+                in_double_quoted = !in_double_quoted;
+                result += c;
+            }
+        } else if (c == '\'') {
+            if (in_double_quoted) {
+                result += c;
+            } else if (in_single_quoted) {
+                in_single_quoted = false;
+                result += '"';
+            } else {
+                in_single_quoted = true;
+                result += '"';
+            }
+        } else {
+            result += c;
+        }
+    }
+
+    return result;
+}
+
+static void test_normalize_quotes_to_json(testing & t) {
+    t.test("basic single to double quotes", [](testing & t) {
+        std::string input = "{'key': 'value'}";
+        std::string expected = "{\"key\": \"value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("basic conversion", expected, result);
+    });
+
+    t.test("escaped single quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'code': 'print(\\'hello\\')'}";
+        std::string expected = "{\"code\": \"print('hello')\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("escaped single quote", expected, result);
+    });
+
+    t.test("double quote inside single-quoted string", [](testing & t) {
+        std::string input = "{'msg': 'He said \"hi\"'}";
+        std::string expected = "{\"msg\": \"He said \\\"hi\\\"\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("double quote escaping", expected, result);
+    });
+
+    t.test("nested backslash escapes", [](testing & t) {
+        std::string input = "{'path': 'C:\\\\Users\\\\test'}";
+        std::string expected = "{\"path\": \"C:\\\\Users\\\\test\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("backslash escaping", expected, result);
+    });
+
+    t.test("newline escapes", [](testing & t) {
+        std::string input = "{'text': 'line1\\nline2'}";
+        std::string expected = "{\"text\": \"line1\\nline2\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("newline escaping", expected, result);
+    });
+
+    t.test("mixed quotes", [](testing & t) {
+        std::string input = "{\"already_double\": 'single_value'}";
+        std::string expected = "{\"already_double\": \"single_value\"}";
+        std::string result = normalize_quotes_to_json(input);
+        t.assert_equal("mixed quotes", expected, result);
+    });
+
+    t.test("embedded quotes - the test case", test_normalize_quotes_with_embedded_quotes);
+}
+
+// Test case that mirrors the Seed-OSS failing test scenario
+static void test_normalize_quotes_with_embedded_quotes(testing & t) {
+    // This is similar to the Seed-OSS template test case
+    // The input has embedded double quotes like "14" and "bar" inside string values
+    std::string input = "{'filename': 'foo.cpp', 'oldString': 'def foo(arg = \"14\"):\\n    return arg + \"bar\"\\n', 'newString': 'def foo(arg = \"15\"):\\n    pass\\n'}";
+
+    // Expected: Python single quotes -> JSON double quotes, internal double quotes escaped
+    std::string expected = "{\"filename\": \"foo.cpp\", \"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", \"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}";
+
+    std::string result = normalize_quotes_to_json(input);
+
+    t.assert_equal("normalize quotes with embedded double quotes", expected, result);
+
+    // Also verify the result is valid JSON
+    try {
+        json parsed = json::parse(result);
+        t.assert_true("result is valid JSON", true);
+        t.assert_equal("filename field", "foo.cpp", parsed["filename"].get<std::string>());
+        t.assert_true("oldString contains embedded quotes",
+            parsed["oldString"].get<std::string>().find("\"14\"") != std::string::npos);
+        t.assert_true("newString contains embedded quotes",
+            parsed["newString"].get<std::string>().find("\"15\"") != std::string::npos);
+    } catch (const std::exception & e) {
+        t.assert_true(std::string("JSON parse failed: ") + e.what(), false);
+    }
+}
+
+// ============================================================================
+// TAG_WITH_TAGGED Argument Parsing Tests
+// ============================================================================
+
+// Build tools definition for edit function
+static json build_edit_tool() {
+    json parameters_schema = json::object();
+    parameters_schema["type"] = "object";
+    parameters_schema["properties"] = json::object();
+    parameters_schema["properties"]["filename"] = json::object({
+        {"type", "string"},
+        {"description", "Path of file to edit"}
+    });
+    parameters_schema["properties"]["oldString"] = json::object({
+        {"type", "string"},
+        {"description", "String to replace"}
+    });
+    parameters_schema["properties"]["newString"] = json::object({
+        {"type", "string"},
+        {"description", "New (replacement) value"}
+    });
+    parameters_schema["required"] = json::array({"filename", "oldString", "newString"});
+
+    return json::array({
+        json{
+            {"type", "function"},
+            {"function", json{
+                {"name", "edit"},
+                {"description", "Edit a file"},
+                {"parameters", parameters_schema}
+            }}
+        }
+    });
+}
+
+// Test that reproduces the Seed-OSS template issue with embedded quotes
+static void test_tagged_args_with_embedded_quotes(testing & t) {
+    json tools = build_edit_tool();
+
+    // Build a parser for TAG_WITH_TAGGED format like Seed-OSS/Nemotron
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // Build tool choice for the edit function
+        auto tool_choice = p.choice();
+
+        for (const auto & tool_def : tools) {
+            if (!tool_def.contains("function")) { continue; }
+            const auto & function = tool_def.at("function");
+            std::string name = function.at("name");
+            const auto & params = function.at("parameters");
+
+            if (!params.contains("properties") || !params.at("properties").is_object()) { continue; }
+
+            const auto & properties = params.at("properties");
+
+            // Build argument parsers
+            std::vector<common_peg_parser> arg_parsers;
+            for (const auto & [param_name, param_schema] : properties.items()) {
+                auto arg = p.tool_arg(
+                    p.tool_arg_open(p.literal("<parameter=") + p.tool_arg_name(p.literal(param_name)) + p.literal(">")) +
+                    p.space() +
+                    p.tool_arg_string_value(p.until("</parameter>")) +
+                    p.space() +
+                    p.tool_arg_close(p.literal("</parameter>"))
+                );
+                arg_parsers.push_back(p.optional(p.rule("arg-" + param_name, arg)));
+            }
+
+            // Build arg sequence with space() between
+            common_peg_parser args_seq = p.eps();
+            for (size_t i = 0; i < arg_parsers.size(); i++) {
+                if (i > 0) {
+                    args_seq = args_seq + p.space();
+                }
+                args_seq = args_seq + arg_parsers[i];
+            }
+
+            auto func_parser =
+                p.tool_open(p.literal("<function=") + p.tool_name(p.literal(name)) + p.literal(">")) +
+                p.space() + args_seq + p.space() +
+                p.tool_close(p.literal("</function>"));
+
+            tool_choice |= p.rule("tool-" + name, p.tool(func_parser));
+        }
+
+        auto tool_section =
+            p.literal("<seed:tool_call>") + p.space() +
+            tool_choice +
+            p.space() + p.literal("</seed:tool_call>");
+
+        return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
+    });
+
+    // The exact input from the failing test
+    std::string input =
+        "<seed:tool_call>\n"
+        "<function=edit>\n"
+        "<parameter=filename>\n"
+        "foo.cpp\n"
+        "</parameter>\n"
+        "<parameter=oldString>"
+        "def foo(arg = \"14\"):\n"
+        "    return arg + \"bar\"\n"
+        "\n"
+        "</parameter>\n"
+        "<parameter=newString>"
+        "def foo(arg = \"15\"):\n"
+        "    pass\n"
+        "\n"
+        "</parameter>\n"
+        "</function>\n"
+        "</seed:tool_call>";
+
+    common_peg_parse_context ctx(input);
+    auto result = parser.parse(ctx);
+
+    if (!t.assert_true("parse success", result.success())) {
+        return;
+    }
+
+    common_chat_msg msg;
+    auto mapper = common_chat_peg_mapper(msg);
+    mapper.from_ast(ctx.ast, result);
+
+    t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+
+    if (!msg.tool_calls.empty()) {
+        t.assert_equal("tool name", "edit", msg.tool_calls[0].name);
+
+        // Parse the arguments as JSON to verify they're valid
+        std::string args = msg.tool_calls[0].arguments;
+
+        try {
+            json parsed = json::parse(args);
+            t.assert_true("arguments is valid JSON", true);
+
+            // Verify each field has proper value
+            t.assert_equal("filename", "foo.cpp", parsed.value("filename", ""));
+
+            std::string oldString = parsed.value("oldString", "");
+            t.assert_true("oldString contains embedded quotes",
+                oldString.find("\"14\"") != std::string::npos);
+            t.assert_true("oldString contains bar with quotes",
+                oldString.find("\"bar\"") != std::string::npos);
+
+            std::string newString = parsed.value("newString", "");
+            t.assert_true("newString contains embedded quotes",
+                newString.find("\"15\"") != std::string::npos);
+
+        } catch (const std::exception & e) {
+            t.assert_true(std::string("arguments should be valid JSON: ") + e.what(), false);
+        }
+    }
+}
+
diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp
deleted file mode 100644
index bc5ba20786..0000000000
--- a/tests/test-chat-parser.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//  Tests chat handling, including grammar generation and parsing for tool calling, for various templates.
-//
-//  Also acts as a CLI to generate a Markdown summary of the formats of Jinja templates,
-//  e.g. given Minja (http://github.com/google/minja) checked out in parent dir:
-//
-//    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
-//
-#include <exception>
-#include <iostream>
-#include <nlohmann/json.hpp>
-#include <string>
-
-#include "chat-parser.h"
-#include "common.h"
-#include "log.h"
-#include "regex-partial.h"
-
-using json = nlohmann::ordered_json;
-
-template <class T>
-static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
-    if (expected != actual) {
-        std::cerr << label << std::endl;
-        std::cerr << "Expected: " << expected << std::endl;
-        std::cerr << "Actual: " << actual << std::endl;
-        std::cerr << std::flush;
-        throw std::runtime_error("Test failed");
-    }
-}
-
-template <class T>
-static void assert_equals(const T & expected, const T & actual) {
-    assert_equals("", expected, actual);
-}
-static void assert_equals(const char * expected, const std::string & actual) {
-  return assert_equals<std::string>(expected, actual);
-}
-
-static void assert_throws(const std::function<void()> & fn, const std::string & expected_exception_pattern = "") {
-    try {
-        fn();
-    } catch (const std::exception & e) {
-      if (expected_exception_pattern.empty()) {
-          return;
-        }
-        std::regex expected_exception_regex(expected_exception_pattern);
-        std::string actual_message = e.what();
-        if (std::regex_search(actual_message, expected_exception_regex)) {
-            return;
-        }
-        throw std::runtime_error("Exception doesn't match expected pattern: " + actual_message + " (pattern: " + expected_exception_pattern + ")");
-        throw std::runtime_error("Exception of unexpected type: " + std::string(e.what()));
-    }
-    throw std::runtime_error("Exception was expected but not thrown");
-}
-
-static void test_reasoning() {
-  //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-  {
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-    });
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<tnk>Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-    });
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-    });
-    assert_equals(false, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("Cogito</tnk>Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ true,
-    });
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals(std::string("Cogito"), builder.result().reasoning_content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    common_chat_msg_parser builder("Cogito</tnk>Ergo sum", /* is_partial= */ false, {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ true,
-        /* .thinking_forced_open = */ true,
-    });
-    assert_equals(true, builder.try_parse_reasoning("<tnk>", "</tnk>"));
-    assert_equals("<think>Cogito</think>", builder.result().content);
-    assert_equals("Ergo sum", builder.consume_rest());
-  }
-  {
-    const std::string variant("content_only_inline_think");
-    common_chat_syntax syntax = {
-        /* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-        /* .parse_tool_calls = */ false,
-    };
-    const std::string input = "<think>Pense</think>Bonjour";
-    auto msg = common_chat_parse(input, false, syntax);
-    assert_equals(variant, std::string("Pense"), msg.reasoning_content);
-    assert_equals(variant, std::string("Bonjour"), msg.content);
-  }
-  {
-    const std::string variant("llama_3_inline_think");
-    common_chat_syntax syntax = {
-        /* .format = */ COMMON_CHAT_FORMAT_LLAMA_3_X,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-        /* .parse_tool_calls = */ false,
-    };
-    const std::string input = "<think>Plan</think>Réponse";
-    auto msg = common_chat_parse(input, false, syntax);
-    assert_equals(variant, std::string("Plan"), msg.reasoning_content);
-    assert_equals(variant, std::string("Réponse"), msg.content);
-  }
-  // Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_syntax syntax = {
-        /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ true,
-        /* .parse_tool_calls = */ true,
-    };
-    const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
-    common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
-    assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
-    assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
-    assert_equals(variant, std::string("ok"), builder.consume_rest());
-  }
-  // Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
-  {
-    common_chat_syntax syntax = {
-        /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ true,
-        /* .parse_tool_calls = */ true,
-    };
-    const std::string variant("deepseek_v3_1_reasoning_format_none");
-    const std::string input = "REASONING</think>ok";
-    auto msg = common_chat_parse(input, false, syntax);
-    assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-  }
-}
-
-static void test_regex() {
-  auto test_throws = [](const std::string & input, const std::string & regex, const std::string & expected_exception_pattern = "") {
-    common_chat_msg_parser builder(input, /* is_partial= */ false, {});
-    assert_throws([&]() { builder.consume_regex(common_regex(regex)); }, expected_exception_pattern);
-  };
-
-  test_throws("Hello, world!", "abc", "^abc$");
-  test_throws("Hello, world!", "e", "^e$");
-
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    builder.consume_regex(common_regex("Hello"));
-    assert_equals(", world!", builder.consume_rest());
-  }
-
-  {
-    // When in non partial mode, we can say whether the regex was consumed or not.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("Hello, world!")).has_value());
-  }
-  {
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ false, {});
-    auto res = builder.try_consume_regex(common_regex("H(el)l(?:o, world!)?"));
-    assert_equals(true, res.has_value());
-    // Verify captures
-    assert_equals<size_t>(2, res->groups.size());
-    assert_equals("Hell", builder.str(res->groups[0]));
-    assert_equals("el", builder.str(res->groups[1]));
-    // Verify position is after the match
-    assert_equals<size_t>(4, builder.pos());
-    assert_equals("o,", builder.consume_rest());
-  }
-  {
-    // But in partial mode, we have a partial final match / can't decide, so we throw a partial exception.
-    common_chat_msg_parser builder("Hello,", /* is_partial= */ true, {});
-    assert_throws([&]() {
-      builder.try_consume_regex(common_regex("Hello, world!"));
-    }, "^Hello, world!$");
-  }
-
-  // Now regardless of the mode, we can tell these aren't a match.
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_regex(common_regex("a(b|c)(d|e)f")).has_value());
-  }
-  for (const auto is_partial : {false, true}) {
-    common_chat_msg_parser builder("Hello,", is_partial, {});
-    assert_equals(false, builder.try_consume_literal("Oh"));
-  }
-}
-
-const std::vector<std::string> barely_healable_jsons = {
-  "{",
-  "{\"",
-  "{\"\\",
-  "{\"n",
-  "{\"name\"",
-  "{\"name\":",
-  "{\"name\":\"",
-  "{\"name\":\"\\",
-  "{\"name\":\"python",
-  "{\"name\":\"python\\",
-  "{\",",
-  "{\":",
-  "{\"[",
-  "{\"]",
-  "{\"{",
-  "{\"}",
-  "{\"1",
-  "{\"name\":\",",
-  "{\"name\":\":",
-  "{\"name\":\"[",
-  "{\"name\":\"]",
-  "{\"name\":\"{",
-  "{\"name\":\"}",
-  "{\"name\":\"1",
-};
-
-static void test(const std::string & input, bool is_partial, const std::vector<std::vector<std::string>> & args_paths, const std::vector<std::vector<std::string>> & content_paths, const std::string & expected) {
-  common_chat_msg_parser builder(input, is_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args(args_paths, content_paths);
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
-}
-
-static void test_deepseek_v3_1_tool_calls() {
-    //common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
-    // variant: happy path for when it works as the model card says it should
-    const std::string variant("simple");
-    common_chat_syntax syntax = {
-        /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-        /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-        /* .reasoning_in_content = */ false,
-        /* .thinking_forced_open = */ false,
-        /* .parse_tool_calls = */ true,
-    };
-    const std::string input = "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-    auto msg = common_chat_parse(input, false, syntax);
-    assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
-    assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
-    // JSON arguments are dumped without spaces
-    assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
-    assert_equals(variant, std::string(""), msg.content);
-    assert_equals(variant, std::string(""), msg.reasoning_content);
-
-    // variant: simple + thinking open
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ true,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("simple_thinking");
-        const std::string in = "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: simple + multiple tool calls
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ false,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("simple_multiple_tool_calls");
-        const std::string in = "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
-        assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-
-
-    // variant: thinking forced open + tool call in reasoning content
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ true,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING"), m.reasoning_content);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-    //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-    //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-    //          add the reasoning content as regular content and parse the tool calls.
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ true,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals(variant, std::string("REASONING"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-        assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
-        assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
-        assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
-    }
-
-    // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ true,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
-        const std::string in = "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>";
-        auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
-        assert_equals(variant, std::string("REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>"), m.reasoning_content);
-        assert_equals(variant, std::string(""), m.content);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-    }
-
-    // variant: thinking not forced open + reasoning + regular content + no tool calls
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ true,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
-        const std::string in = "REASONING</think>CONTENT";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string("REASONING"), m.reasoning_content);
-    }
-    // variant: thinking not forced open + missing reasoning + no tool calls
-    {
-        common_chat_syntax syntax = {
-            /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-            /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-            /* .reasoning_in_content = */ false,
-            /* .thinking_forced_open = */ false,
-            /* .parse_tool_calls = */ true,
-        };
-        const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
-        const std::string in = "CONTENT";
-        auto m = common_chat_parse(in, false, syntax);
-        assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
-        assert_equals(variant, std::string("CONTENT"), m.content);
-        assert_equals(variant, std::string(""), m.reasoning_content);
-    }
-}
-
-static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
-  common_chat_msg_parser builder(input, parse_as_partial, {});
-  auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
-  assert_equals(true, js.has_value());
-  assert_equals(is_partial, js->is_partial);
-  assert_equals(expected, js->value.dump());
-}
-
-static void test_json_with_dumped_args_no_args() {
-  // Normal JSON, nothing to heal, nothing to dump
-  test("{\"name\": \"python\"}", false, {}, {}, "{\"name\":\"python\"}");
-  // Full json is args
-  test("{\"name\": \"python\"}", false, {{}}, {}, "{\"name\":\"python\"}");
-
-  // If the arguments are further down, don't heal partial content.
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{"arguments"}}, {}, "{}");
-  }
-  // But heal content that isn't partial.
-  test("{\"name\": \"python\"", true, {{"arguments"}}, {}, "{\"name\":\"python\"}");
-}
-
-static void test_json_with_dumped_args() {
-
-  // Partial content.
-  test("{\"content\": \"t", true, {}, {{"content"}}, "{\"content\":\"t\"}");
-  test("{\"content\": \"", true, {}, {{"content"}}, "{\"content\":\"\"}");
-  test("{\"content\": ", true, {}, {{"content"}}, "{}");
-
-  // If the entire JSON is the arguments, healing it them dumping it produces the same output as the input (just reformatted).
-  test("{\"name\": \"python", true, {{}}, {}, "{\"name\":\"python");
-  for (const auto & src : barely_healable_jsons) {
-    test(src, true, {{}}, {}, src);
-  }
-
-  // Full JSON w/ args
-  for (auto parse_as_partial : {true, false}) {
-    test_with_args(
-      R"({"name": "python", "args": {"arg1": 1}})",
-      R"({"name":"python","args":"{\"arg1\":1}"})",
-      parse_as_partial,
-      /* is_partial= */ false
-    );
-  }
-
-  // Partial JSON w/ partial args
-  test_with_args(
-    R"({"foo": "bar", "args": {")",
-    R"({"foo":"bar","args":"{\""})"
-  );
-  // Partial args broken in object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"ar)",
-    R"({"foo":"bar","args":"{\"ar"})"
-  );
-  // Partial args broken after object key
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1")",
-    R"({"foo":"bar","args":"{\"arg1\""})"
-  );
-  // Partial args broken before object value
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1":)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken before object value (space)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": )",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that may not be complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1)",
-    R"({"foo":"bar","args":"{\"arg1\":"})"
-  );
-  // Partial args broken in object value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": 1 )",
-    R"({"foo":"bar","args":"{\"arg1\":1"})"
-  );
-  // Partial args broken in object value that is incomplete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": ")",
-    R"({"foo":"bar","args":"{\"arg1\":\""})"
-  );
-  // Partial args broken in object value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "1")",
-    R"({"foo":"bar","args":"{\"arg1\":\"1\""})"
-  );
-  // Partial args broken on array opening
-  test_with_args(
-    R"({"foo": "bar", "args": [)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is incomplete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1)",
-    R"({"foo":"bar","args":"["})"
-  );
-  // Partial args broken on array value that is complete (int)
-  test_with_args(
-    R"({"foo": "bar", "args": [1 )",
-    R"({"foo":"bar","args":"[1"})"
-  );
-  // Partial args broken on array value that is complete (string)
-  test_with_args(
-    R"({"foo": "bar", "args": ["1")",
-    R"({"foo":"bar","args":"[\"1\""})"
-  );
-  // Partial args broken after array value
-  test_with_args(
-    R"({"foo": "bar", "args": [1,)",
-    R"({"foo":"bar","args":"[1,"})"
-  );
-  // Partial args broken on nested array
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": [)",
-    R"({"foo":"bar","args":"{\"arg1\":["})"
-  );
-
-  // Unicode tests
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u00"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\u0000)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\u0000"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud8)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud8"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud80)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud80"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\u)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\u"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\ud)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\ud"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc0)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc0"})"
-  );
-  test_with_args(
-    R"({"foo": "bar", "args": {"arg1": "\ud800\udc00)",
-    R"({"foo":"bar","args":"{\"arg1\":\"\\ud800\\udc00"})"
-  );
-}
-
-static void test_positions() {
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ false, {});
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_to(100); });
-    assert_equals<size_t>(0, builder.pos());
-    assert_throws([&]() { builder.move_back(1); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(8);
-    assert_equals<size_t>(8, builder.pos());
-    builder.move_back(1);
-    assert_equals<size_t>(7, builder.pos());
-    assert_equals("world!", builder.consume_rest());
-
-    builder.move_to(0);
-    assert_equals<size_t>(0, builder.pos());
-
-    assert_throws([&]() { builder.finish(); });
-    assert_equals<size_t>(0, builder.pos());
-
-    builder.move_to(builder.input().size());
-    builder.finish();
-  }
-  {
-    common_chat_msg_parser builder("Hello, world!", /* is_partial= */ true, {});
-
-    builder.move_to(builder.input().size());
-    assert_equals<size_t>(builder.input().size(), builder.pos());
-    builder.finish();
-  }
-}
-
-int main() {
-    test_positions();
-    test_json_with_dumped_args_no_args();
-    test_json_with_dumped_args();
-    test_reasoning();
-    test_regex();
-    test_deepseek_v3_1_tool_calls();
-    std::cout << "All tests passed!\n";
-    return 0;
-}
diff --git a/tests/test-chat-peg-parser.cpp b/tests/test-chat-peg-parser.cpp
index d3a4cfd226..908b13fd0c 100644
--- a/tests/test-chat-peg-parser.cpp
+++ b/tests/test-chat-peg-parser.cpp
@@ -1,8 +1,3 @@
-#include <string>
-#include <iostream>
-#include <numeric>
-
-#include "chat-parser.h"
 #include "chat-peg-parser.h"
 #include "chat.h"
 #include "common.h"
@@ -10,6 +5,11 @@
 #include "peg-parser.h"
 #include "testing.h"
 #include "peg-parser/simple-tokenize.h"
+
+#include <iostream>
+#include <numeric>
+#include <string>
+
 #include "nlohmann/json.hpp"
 
 using json = nlohmann::ordered_json;
@@ -17,9 +17,12 @@ using json = nlohmann::ordered_json;
 static json create_tools();
 static void test_example_native(testing & t);
 static void test_example_qwen3_coder(testing & t);
+static void test_example_qwen3_non_coder(testing & t);
 static void test_command7_parser_compare(testing & t);
+static void test_prefix_tool_names(testing & t);
+static void test_tagged_peg_parser(testing & t);
 
-int main(int argc, char *argv[]) {
+int main(int argc, char * argv[]) {
     testing t(std::cout);
     if (argc >= 2) {
         t.set_filter(argv[1]);
@@ -32,7 +35,10 @@ int main(int argc, char *argv[]) {
 
     t.test("native", test_example_native);
     t.test("qwen3 coder", test_example_qwen3_coder);
+    t.test("qwen3 non-coder", test_example_qwen3_non_coder);
     t.test("comparison", test_command7_parser_compare);
+    t.test("prefix tool names", test_prefix_tool_names);
+    t.test("tagged peg parser", test_tagged_peg_parser);
 
     return t.summary();
 }
@@ -41,87 +47,75 @@ static json create_tools() {
     json tools = json::array();
 
     json tool_weather = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_current_weather"},
-            {"description", "Get the current weather in a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_current_weather" },
+              { "description", "Get the current weather in a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description",
+                              "The temperature unit to use. Infer this from the users location." } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_weather);
 
     json tool_forecast = {
-        {"type", "function"},
-        {"function", {
-            {"name", "get_forecast"},
-            {"description", "Get the weather forecast for a given location"},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"location", {
-                        {"type", "string"},
-                        {"description", "The city and state, e.g. San Francisco, CA"}
-                    }},
-                    {"unit", {
-                        {"type", "string"},
-                        {"enum", {"celsius", "fahrenheit"}},
-                        {"description", "The temperature unit to use. Infer this from the users location."}
-                    }},
-                    {"days", {
-                        {"type", "integer"},
-                        {"description", "Number of days to forecast (1-10)"},
-                        {"minimum", 1},
-                        {"maximum", 10}
-                    }}
-                }},
-                {"required", {"location", "unit"}},
-            }},
-        }}
+        { "type",     "function" },
+        { "function",
+         {
+              { "name", "get_forecast" },
+              { "description", "Get the weather forecast for a given location" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      { { "location",
+                          { { "type", "string" }, { "description", "The city and state, e.g. San Francisco, CA" } } },
+                        { "unit",
+                          { { "type", "string" },
+                            { "enum", { "celsius", "fahrenheit" } },
+                            { "description", "The temperature unit to use. Infer this from the users location." } } },
+                        { "days",
+                          { { "type", "integer" },
+                            { "description", "Number of days to forecast (1-10)" },
+                            { "minimum", 1 },
+                            { "maximum", 10 } } } } },
+                    { "required", { "location", "unit" } },
+                } },
+          }                      }
     };
     tools.push_back(tool_forecast);
 
     json tool_search = {
-        {"type", "function"},
-        {"function", {
-            {"name", "search_knowledge_base"},
-            {"description", "Search the internal technical documentation knowledge base."},
-            {"parameters", {
-                {"type", "object"},
-                {"properties", {
-                    {"query", {
-                        {"type", "string"},
-                        {"description", "The search query string."}
-                    }},
-                    {"max_results", {
-                        {"type", "integer"},
-                        {"description", "The maximum number of results to return."},
-                        {"default", 5}
-                    }},
-                    {"category", {
-                        {"type", "string"},
-                        {"enum", {"api", "troubleshooting", "billing", "general"}},
-                        {"description", "Filter search by specific category."}
-                    }}
-                }},
-                {"required", {"query", "category"}},
-                {"additionalProperties", false}
-            }},
-            {"strict", true}
-        }}
+        { "type",     "function" },
+        { "function",
+         { { "name", "search_knowledge_base" },
+            { "description", "Search the internal technical documentation knowledge base." },
+            { "parameters",
+              { { "type", "object" },
+                { "properties",
+                  { { "query", { { "type", "string" }, { "description", "The search query string." } } },
+                    { "max_results",
+                      { { "type", "integer" },
+                        { "description", "The maximum number of results to return." },
+                        { "default", 5 } } },
+                    { "category",
+                      { { "type", "string" },
+                        { "enum", { "api", "troubleshooting", "billing", "general" } },
+                        { "description", "Filter search by specific category." } } } } },
+                { "required", { "query", "category" } },
+                { "additionalProperties", false } } },
+            { "strict", true } } }
     };
     tools.push_back(tool_search);
 
@@ -131,274 +125,238 @@ static json create_tools() {
 struct tool_argument {
     std::string name;
     std::string type;
-    bool is_required;
-    json schema;
+    bool        is_required;
+    json        schema;
 };
 
 struct tool_definition {
-    std::string name;
+    std::string                name;
     std::vector<tool_argument> arguments;
-    json schema;
+    json                       schema;
 };
 
 // Test fictitious model output that emits arguments as JSON.
 static void test_example_native(testing & t) {
     struct test_case {
         // Parameters
-        std::string name;
-        json tools;
+        std::string             name;
+        json                    tools;
         common_chat_tool_choice tool_choice;
         common_reasoning_format reasoning_format;
-        json json_schema;
-        bool parallel_tool_calls;
-        bool thinking_forced_open;
-        std::string input;
+        json                    json_schema;
+        bool                    parallel_tool_calls;
+        std::string             generation_prompt;
+        std::string             input;
 
         // Expect
-        std::string expect_reasoning;
-        std::string expect_content;
+        std::string                        expect_reasoning;
+        std::string                        expect_content;
         std::vector<common_chat_tool_call> expect_tool_calls;
     };
 
     auto build_parser = [](const test_case & tc) {
-        return build_chat_peg_native_parser([&](common_chat_peg_native_builder & p) {
+        return build_chat_peg_parser([&](common_chat_peg_builder & p) {
             auto reasoning_in_content = (tc.reasoning_format == COMMON_REASONING_FORMAT_NONE);
-            auto reasoning = p.eps();
-            if (tc.thinking_forced_open) {
-                // If thinking is forced open, expect a closing tag
-                reasoning = p.reasoning(p.until("</think>")) + "</think>" + p.space();
-            } else {
-                // Otherwise, optionally accept thinking wrapped in tags
-                reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
-            }
+            // Always use optional TAG_BASED pattern; generation_prompt is prepended to input
+            auto reasoning = p.optional("<think>" + p.reasoning(p.until("</think>")) + "</think>" + p.space());
 
             // tool calling parser
             if (tc.tools.is_array() && !tc.tools.empty()) {
-                auto tools = p.choice();
-                for (const auto & tool : tc.tools) {
-                    const auto & function = tool.at("function");
-                    std::string name = function.at("name");
-                    const auto & schema = function.at("parameters");
-
-                    auto tool_name = p.json_member("name", "\"" + p.tool_name(p.literal(name)) + "\"");
-                    auto tool_args = p.json_member("arguments", p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", schema)));
-
-                    tools |= p.rule("tool-" + name, p.tool_open(p.literal("{")) << tool_name << "," << tool_args << "}");
-                };
+                auto tool_call =
+                    p.standard_json_tools("<tool_call>[", "]</tool_call>", tc.tools, tc.parallel_tool_calls,
+                                          tc.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED);
 
-                auto parallel_calls = p.eps();
-                if (tc.parallel_tool_calls) {
-                    parallel_calls = p.zero_or_more("," << tools);
-                }
-
-                auto tool_call = p.trigger_rule("tool-call",
-                    p.sequence({
-                        p.literal("<tool_call>["),
-                        tools,
-                        parallel_calls,
-                        p.literal("]</tool_call>")
-                    })
-                );
-
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.until("<tool_call>")),
-                    p.optional(p.space() + tool_call),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.until("<tool_call>")),
+                                    p.optional(p.space() + tool_call), p.space(), p.end() });
             }
 
             // response_format parser
             if (tc.json_schema.is_object() && !tc.json_schema.empty()) {
-                return p.sequence({
-                    (reasoning_in_content ? p.eps() : reasoning),
-                    p.content(p.schema(p.json(), "response-output", tc.json_schema)),
-                    p.space(),
-                    p.end()
-                });
+                return p.sequence({ (reasoning_in_content ? p.eps() : reasoning),
+                                    p.content(p.schema(p.json(), "response-output", tc.json_schema)), p.space(),
+                                    p.end() });
             }
 
             // Content-only parser
-            return p.sequence({
-                (reasoning_in_content ? p.eps() : reasoning),
-                p.content(p.rest()),
-                p.end()
-            });
+            return p.sequence({ (reasoning_in_content ? p.eps() : reasoning), p.content(p.rest()), p.end() });
         });
     };
 
     std::vector<test_case> test_cases = std::vector<test_case>{
         {
-            /* .name =                 */ "content with thinking_forced_open = false",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with reasoning (no generation_prompt)",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "",
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and no reasoning",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ false,
-            /* .input =                */ (
-                "Hello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content without reasoning (no generation_prompt)",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "",
+         /* .input =                */ ("Hello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = false and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "<think>The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with reasoning_format = none (tags appear in content)",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "",
+         /* .input =                */ ("<think>The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "<think>The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "The user said hello, I must say hello back",
-            /* .expect_content =       */ "Hello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with reasoning generation_prompt",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "<think>",
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "The user said hello, I must say hello back",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "content with thinking_forced_open = true and reasoning_format = none",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "The user said hello, I must say hello back</think>\nHello"
-            ),
-            /* .expect_reasoning =     */ "",
-            /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "content with reasoning generation_prompt and reasoning_format = none",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_NONE,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "",
+         /* .input =                */ ("The user said hello, I must say hello back</think>\nHello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "The user said hello, I must say hello back</think>\nHello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and no parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York</think>\n"
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York",
-            /* .expect_content =       */ "",
-            /* .expect_tool_calls =    */ {{
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }},
-        },
+         /* .name =                 */ "content with closed reasoning generation_prompt (empty reasoning discarded)",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "<think></think>",
+         /* .input =                */ ("Hello"),
+         /* .expect_reasoning =     */ "",
+         /* .expect_content =       */ "Hello",
+         /* .expect_tool_calls =    */ {},
+         },
         {
-            /* .name =                 */ "tools with tool_choice = auto and parallel_tool_calls",
-            /* .tools =                */ create_tools(),
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {},
-            /* .parallel_tool_calls =  */ true,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me search that for you."
-                "<tool_call>["
-                R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
-                ", "
-                R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
-                "]</tool_call>"
-            ),
-            /* .expect_reasoning =     */ "I must get the weather in New York and San Francisco and a 3 day forecast of each.",
-            /* .expect_content =       */ "Let me search that for you.",
-            /* .expect_tool_calls =    */ {{
+         /* .name =                 */ "tools with reasoning generation_prompt",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "<think>",
+         /* .input =                */
+            ("I must get the weather in New York</think>\n"
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */ "I must get the weather in New York",
+         /* .expect_content =       */ "",
+         /* .expect_tool_calls =    */
+            { {
                 /* .name =      */ "get_current_weather",
                 /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
                 /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_current_weather",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }, {
-                /* .name =      */ "get_forecast",
-                /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
-                /* .id =        */ "",
-            }},
-        },
+            } },
+         },
         {
-            /* .name =                 */ "response_format with thinking_forced_open = true",
-            /* .tools =                */ {},
-            /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
-            /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
-            /* .json_schema =          */ {
-                {"type", "object"},
-                {"properties", {
-                    {"invoice_number", {{"type", "string"}}},
-                    {"amount", {{"type", "number"}}},
-                    {"due_date", {{"type", "string"}}}
-                }},
-                {"required", {"invoice_number", "amount", "due_date"}}
-            },
-            /* .parallel_tool_calls =  */ false,
-            /* .thinking_forced_open = */ true,
-            /* .input =                */ (
-                "I must produce the invoice in the requested format</think>\n"
-                R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"
-            ),
-            /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
-            /* .expect_content =       */ R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})",
-            /* .expect_tool_calls =    */ {},
-        },
+         /* .name =                 */ "parallel tools with reasoning generation_prompt",
+         /* .tools =                */ create_tools(),
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_AUTO,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */ {},
+         /* .parallel_tool_calls =  */ true,
+         /* .generation_prompt =    */ "<think>",
+         /* .input =                */
+            ("I must get the weather in New York and San Francisco and a 3 day forecast of each.</think>\nLet me "
+             "search that for you."
+             "<tool_call>["
+             R"({"name": "get_current_weather", "arguments": {"location": "New York City, NY", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_current_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "New York City, NY", "unit": "fahrenheit", "days": 3}})"
+             ", "
+             R"({"name": "get_forecast", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3}})"
+             "]</tool_call>"),
+         /* .expect_reasoning =     */
+            "I must get the weather in New York and San Francisco and a 3 day forecast of each.",                                                                     /* .expect_content =       */ "Let me search that for you.",
+         /* .expect_tool_calls =    */
+            { {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_current_weather",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit"})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "New York City, NY", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              },
+              {
+                  /* .name =      */ "get_forecast",
+                  /* .arguments = */ R"({"location": "San Francisco, CA", "unit": "fahrenheit", "days": 3})",
+                  /* .id =        */ "",
+              } },
+         },
+        {
+         /* .name =                 */ "response_format with reasoning generation_prompt",
+         /* .tools =                */ {},
+         /* .tool_choice =          */ COMMON_CHAT_TOOL_CHOICE_NONE,
+         /* .reasoning_format =     */ COMMON_REASONING_FORMAT_AUTO,
+         /* .json_schema =          */
+            { { "type", "object" },
+              { "properties",
+                { { "invoice_number", { { "type", "string" } } },
+                  { "amount", { { "type", "number" } } },
+                  { "due_date", { { "type", "string" } } } } },
+              { "required", { "invoice_number", "amount", "due_date" } } },
+         /* .parallel_tool_calls =  */ false,
+         /* .generation_prompt =    */ "<think>",
+         /* .input =                */
+            ("I must produce the invoice in the requested format</think>\n"
+             R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})"),
+         /* .expect_reasoning =     */ "I must produce the invoice in the requested format",
+         /* .expect_content =       */
+            R"({"invoice_number": "INV-2025-001", "amount": 1250.50, "due_date": "2025-12-31"})", /* .expect_tool_calls =    */ {},
+         },
     };
 
     for (const auto & tc : test_cases) {
         t.test(tc.name, [&](testing & t) {
-            auto parser = build_parser(tc);
-            auto lazy = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
+            auto parser  = build_parser(tc);
+            auto lazy    = !tc.tools.empty() && tc.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
             auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-                for (auto const & def : tc.tools) {
-                    auto function = def.at("function");
+                for (const auto & def : tc.tools) {
+                    auto function   = def.at("function");
                     auto parameters = function.at("parameters");
                     builder.resolve_refs(parameters);
                 };
@@ -406,17 +364,18 @@ static void test_example_native(testing & t) {
             });
 
             t.log("Grammar:");
-            for (auto const & line : string_split(grammar, "\n")) {
+            for (const auto & line : string_split(grammar, "\n")) {
                 t.log(line);
             }
 
-            common_peg_parse_context ctx(tc.input, false);
-            auto result = parser.parse(ctx);
+            std::string              effective_input = tc.generation_prompt + tc.input;
+            common_peg_parse_context ctx(effective_input);
+            auto                     result = parser.parse(ctx);
 
             t.assert_true("success", result.success());
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_native_mapper(msg);
+            auto            mapper = common_chat_peg_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             t.assert_equal("content equal", tc.expect_content, msg.content);
@@ -431,16 +390,16 @@ static void test_example_native(testing & t) {
 }
 
 static void test_example_qwen3_coder(testing & t) {
-    auto tools = create_tools();
-    auto parser = build_chat_peg_constructed_parser([&](common_chat_peg_constructed_builder & p) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
         auto content = p.rule("content", p.content(p.until("<tool_call>")));
 
         std::vector<common_peg_parser> tool_parsers;
-        for (auto const & def : tools) {
-            auto function = def.at("function");
-            std::string name = function.at("name");
-            auto parameters = function.at("parameters");
-            auto properties = parameters.at("properties");
+        for (const auto & def : tools) {
+            auto        function   = def.at("function");
+            std::string name       = function.at("name");
+            auto        parameters = function.at("parameters");
+            auto        properties = parameters.at("properties");
 
             std::set<std::string> required_properties;
             if (function.contains("required")) {
@@ -450,59 +409,36 @@ static void test_example_qwen3_coder(testing & t) {
             std::vector<common_peg_parser> arg_parsers;
             for (const auto & [param_name, param_schema] : properties.items()) {
                 bool is_required = required_properties.find(param_name) != required_properties.end();
-                auto type = param_schema.value("type", "object");
-
-                auto arg = p.tool_arg(p.sequence({
-                    p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
-                    (type == "string" ?
-                        p.tool_arg_string_value(
-                            p.schema(
-                                p.until_one_of({
-                                    "</parameter>\n<parameter=",
-                                    "</parameter>\n</function>"
-                                }),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema,
-                                true
-                            )
-                        ) : p.tool_arg_json_value(
-                            p.schema(
-                                p.json(),
-                                "tool-" + name + "-arg-" + param_name + "-schema",
-                                param_schema
-                            )
-                        )
-                    ),
-                    p.tool_arg_close(
-                        "</parameter>\n" +
-                        p.peek(p.literal("<parameter=") | p.literal("</function>"))
-                    )
-                }));
-
-                arg_parsers.push_back(is_required ?
-                    p.rule("tool-" + name + "-arg-" + param_name, arg) :
-                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
+                auto type        = param_schema.value("type", "object");
+
+                auto arg = p.tool_arg(
+                    p.sequence({ p.tool_arg_open("<parameter=" + p.tool_arg_name(p.literal(param_name)) + ">"),
+                                 (type == "string" ?
+                                      p.tool_arg_string_value(p.schema(
+                                          p.until_one_of({ "</parameter>\n<parameter=", "</parameter>\n</function>" }),
+                                          "tool-" + name + "-arg-" + param_name + "-schema", param_schema, true)) :
+                                      p.tool_arg_json_value(p.schema(
+                                          p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema))),
+                                 p.tool_arg_close("</parameter>\n" +
+                                                  p.peek(p.literal("<parameter=") | p.literal("</function>"))) }));
+
+                arg_parsers.push_back(is_required ? p.rule("tool-" + name + "-arg-" + param_name, arg) :
+                                                    p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
             }
 
-            tool_parsers.push_back(p.rule("tool-" + name,
-                p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
-                << p.sequence(arg_parsers)
-                << p.tool_close(p.literal("</function>"))
-            ));
+            tool_parsers.push_back(p.rule("tool-" + name, p.tool_open("<function=" + p.tool_name(p.literal(name)) + ">")
+                                                              << p.sequence(arg_parsers)
+                                                              << p.tool_close(p.literal("</function>"))));
         };
 
-        auto tool_call = p.trigger_rule("tool-call",
-            "<tool_call>"
-            << p.choice(tool_parsers)
-            << "</tool_call>"
-        );
+        auto tool_call = p.trigger_rule("tool-call", "<tool_call>" << p.choice(tool_parsers) << "</tool_call>");
 
         return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
     auto grammar = build_grammar([&](const common_grammar_builder & builder) {
-        for (auto const & def : tools) {
-            auto function = def.at("function");
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
             auto parameters = function.at("parameters");
             builder.resolve_refs(parameters);
         };
@@ -510,11 +446,11 @@ static void test_example_qwen3_coder(testing & t) {
     });
 
     t.log("Grammar:");
-    for (auto const & line : string_split(grammar, "\n")) {
+    for (const auto & line : string_split(grammar, "\n")) {
         t.log(line);
     }
 
-    t.test("incremental parsing", [&](testing &t) {
+    t.test("incremental parsing", [&](testing & t) {
         std::string input =
             "Let me search the knowledge base for cat pictures."
             "<tool_call>\n"
@@ -530,7 +466,7 @@ static void test_example_qwen3_coder(testing & t) {
         for (auto it = tokens.begin(); it != tokens.end(); it++) {
             std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
 
-            common_peg_parse_context ctx(in, it + 1 < tokens.end());
+            common_peg_parse_context ctx(in, (it + 1 < tokens.end()) ? COMMON_PEG_PARSE_FLAG_LENIENT : COMMON_PEG_PARSE_FLAG_NONE);
 
             auto result = parser.parse(ctx);
             if (!t.assert_equal("not fail", false, result.fail())) {
@@ -538,7 +474,7 @@ static void test_example_qwen3_coder(testing & t) {
             }
 
             common_chat_msg msg;
-            auto mapper = common_chat_peg_constructed_mapper(msg);
+            auto            mapper = common_chat_peg_mapper(msg);
             mapper.from_ast(ctx.ast, result);
 
             //t.log("Input: " + input);
@@ -554,7 +490,105 @@ static void test_example_qwen3_coder(testing & t) {
             try {
                 // This shouldn't emit any runtime errors
                 auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
-            } catch(const std::exception & e) {
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("failed with ") + e.what(), false);
+            }
+
+            prev = msg;
+        }
+    });
+}
+
+static void test_example_qwen3_non_coder(testing & t) {
+    auto tools  = create_tools();
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        // tool calling parser using standard JSON format
+        auto tool_call = p.standard_json_tools("<tool_call>", "</tool_call>", tools, true, false);
+
+        return p.sequence({ p.content(p.until("<tool_call>")), p.optional(p.space() + tool_call), p.end() });
+    });
+
+    auto grammar = build_grammar([&](const common_grammar_builder & builder) {
+        for (const auto & def : tools) {
+            auto function   = def.at("function");
+            auto parameters = function.at("parameters");
+            builder.resolve_refs(parameters);
+        };
+        parser.build_grammar(builder);
+    });
+
+    t.log("Grammar:");
+    for (const auto & line : string_split(grammar, "\n")) {
+        t.log(line);
+    }
+
+    t.test("tool call parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "I need to get the weather.\n", msg.content);
+        t.assert_equal("reasoning", "", msg.reasoning_content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "get_current_weather", msg.tool_calls[0].name);
+            t.assert_equal("tool args", "{\"location\": \"New York City, NY\", \"unit\": \"fahrenheit\"}",
+                           msg.tool_calls[0].arguments);
+        }
+    });
+
+    t.test("incremental parsing", [&](testing & t) {
+        std::string input =
+            "I need to get the weather.\n"
+            "<tool_call>"
+            "{\"name\": \"get_current_weather\", \"arguments\": {\"location\": \"New York City, NY\", \"unit\": "
+            "\"fahrenheit\"}}"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, (it + 1 < tokens.end()) ? COMMON_PEG_PARSE_FLAG_LENIENT : COMMON_PEG_PARSE_FLAG_NONE);
+
+            auto result = parser.parse(ctx);
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            //t.log("Input: " + input);
+            t.log("===========================================");
+            t.log("Iteration " + std::to_string(in.size()));
+            t.log("Reasoning: " + msg.reasoning_content);
+            t.log("Content  : " + msg.content);
+            for (const auto & tc : msg.tool_calls) {
+                t.log("Tool name: " + tc.name);
+                t.log("Tool args: " + tc.arguments);
+            }
+
+            try {
+                // This shouldn't emit any runtime errors
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
                 t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
                 t.assert_true(std::string("failed with ") + e.what(), false);
             }
@@ -565,38 +599,37 @@ static void test_example_qwen3_coder(testing & t) {
 }
 
 void test_command7_parser_compare(testing & t) {
-    auto parser = build_chat_peg_native_parser([](common_chat_peg_native_builder & p) {
-        auto thinking = p.reasoning_block(
-            "<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
+    auto parser = build_chat_peg_parser([](common_chat_peg_builder & p) {
+        auto thinking =
+            p.reasoning_block("<|START_THINKING|>" << p.reasoning(p.until("<|END_THINKING|>")) << "<|END_THINKING|>");
 
         auto response = "<|START_RESPONSE|>" << p.content(p.until("<|END_RESPONSE|>")) << "<|END_RESPONSE|>";
 
-        auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.json_string_content()) + "\"")));
-        auto tool_call_name = p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.json_string_content()) + "\"")));
+        auto tool_call_id = p.atomic("\"tool_call_id\"" << (":" << ("\"" + p.tool_id(p.string_content('"')) + "\"")));
+        auto tool_call_name =
+            p.atomic("\"tool_name\"" << (":" << ("\"" + p.tool_name(p.string_content('"')) + "\"")));
         auto tool_call_args = "\"parameters\"" << (":" << p.tool_args(p.json()));
 
         auto tool_call_fields = p.rule("tool-call-fields", tool_call_id | tool_call_name | tool_call_args);
-        auto tool_call = p.rule("tool-call", p.tool(
-            p.tool_open(p.literal("{"))
-            << tool_call_fields
-            << p.zero_or_more( p.literal(",") << tool_call_fields)
-            << p.tool_close(p.literal("}"))
-        ));
-
-        auto tool_calls = p.rule("tool-calls",
-            "<|START_ACTION|>"
-            << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
-            << "<|END_ACTION|>");
+        auto tool_call =
+            p.rule("tool-call", p.tool(p.tool_open(p.literal("{"))
+                                       << tool_call_fields << p.zero_or_more(p.literal(",") << tool_call_fields)
+                                       << p.tool_close(p.literal("}"))));
+
+        auto tool_calls = p.rule(
+            "tool-calls", "<|START_ACTION|>" << ("[" << tool_call << p.zero_or_more(p.literal(",") << tool_call) << "]")
+                                             << "<|END_ACTION|>");
 
         return p.optional(thinking) << (tool_calls | response) + p.end();
     });
 
-    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial, bool print_results) {
-        common_peg_parse_context ctx(input, is_partial);
-        auto result = p.parse(ctx);
+    auto test_current = [&](const common_peg_arena & p, const std::string & input, bool is_partial,
+                            bool print_results) {
+        common_peg_parse_context ctx(input, is_partial ? COMMON_PEG_PARSE_FLAG_LENIENT : COMMON_PEG_PARSE_FLAG_NONE);
+        auto                     result = p.parse(ctx);
 
         common_chat_msg msg;
-        auto mapper = common_chat_peg_native_mapper(msg);
+        auto            mapper = common_chat_peg_mapper(msg);
         mapper.from_ast(ctx.ast, result);
 
         if (print_results) {
@@ -614,79 +647,19 @@ void test_command7_parser_compare(testing & t) {
         }
     };
 
-    auto test_legacy = [&](const std::string & input, bool need_more_input, bool print_results) {
-        // Original common_chat_combinator_parser taken from chat.cpp
-        common_chat_msg_parser builder(
-            input,
-            /* .is_partial = */ need_more_input,
-            {
-                /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
-                /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                /* .reasoning_in_content = */ false,
-                /* .thinking_forced_open = */ false,
-            }
-        );
-
-        builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
-
-        static const common_regex start_action_regex("<\\|START_ACTION\\|>");
-        static const common_regex end_action_regex("<\\|END_ACTION\\|>");
-        static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
-        static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
-
-        if (auto res = builder.try_find_regex(start_action_regex)) {
-            // If we didn't extract thoughts, prelude includes them.
-            auto tool_calls = builder.consume_json_with_dumped_args({ { "parameters" } });
-            for (const auto & tool_call : tool_calls.value) {
-                std::string name      = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
-                std::string id        = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
-                std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
-                if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
-                    throw common_chat_msg_partial_exception("incomplete tool call");
-                }
-            }
-            if (tool_calls.is_partial) {
-                throw common_chat_msg_partial_exception("incomplete tool call");
-            }
-            builder.consume_regex(end_action_regex);
-        } else if (auto res = builder.try_find_regex(start_response_regex)) {
-            if (!builder.try_find_regex(end_response_regex)) {
-                builder.add_content(builder.consume_rest());
-                throw common_chat_msg_partial_exception(end_response_regex.str());
-            }
-        } else {
-            builder.add_content(builder.consume_rest());
-        }
-
-        if (print_results) {
-            std::cout << "== Parsed (legacy) ==\n";
-            std::cout << "=== Reasoning ===\n";
-            std::cout << builder.result().reasoning_content << "\n";
-            std::cout << "\n\n=== Content ===\n";
-            std::cout << builder.result().content << "\n";
-            std::cout << "\n\n=== Tool Calls ===\n";
-            for (const auto & tc : builder.result().tool_calls) {
-                std::cout << "id: " << tc.id << "\n";
-                std::cout << "name: " << tc.name << "\n";
-                std::cout << "args: " << tc.arguments << "\n";
-            }
-        }
-    };
-
-    std::string reasoning = "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
-            "budget of $4000 for a two-week stay, we need to:\n\n"
-            "1. Identify key historical sites and modern attractions in Japan.\n"
-            "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
-            "3. Determine the best modes of transportation for getting around Japan.\n"
-            "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
-            "overspending.\n"
-            "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
-            "to attractions.";
-
-    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {{
-        "call_0",
-        "plan_trip",
-        nlohmann::json::parse(R"({
+    std::string reasoning =
+        "To plan an effective trip to Japan that includes both historical sites and modern attractions within a "
+        "budget of $4000 for a two-week stay, we need to:\n\n"
+        "1. Identify key historical sites and modern attractions in Japan.\n"
+        "2. Find affordable accommodation options that provide a balance between comfort and cost.\n"
+        "3. Determine the best modes of transportation for getting around Japan.\n"
+        "4. Create a day-by-day itinerary that ensures the user gets to see a variety of attractions without "
+        "overspending.\n"
+        "5. Provide a detailed cost breakdown that includes accommodation, transportation, meals, and entry fees "
+        "to attractions.";
+
+    std::vector<std::tuple<std::string, std::string, nlohmann::json>> tool_calls = {
+        { "call_0", "plan_trip", nlohmann::json::parse(R"({
             "destination": "Japan",
             "duration": 14,
             "budget": 4000,
@@ -694,8 +667,8 @@ void test_command7_parser_compare(testing & t) {
             "accommodation_preferences": "affordable",
             "transportation_preferences": "efficient",
             "meal_preferences": "local cuisine"
-        })")
-    }};
+        })") }
+    };
 
     std::vector<std::string> tokens;
 
@@ -712,10 +685,10 @@ void test_command7_parser_compare(testing & t) {
 
         auto json = nlohmann::json::array();
         for (const auto & tc : tool_calls) {
-            auto tc_json = nlohmann::json::object();
+            auto tc_json            = nlohmann::json::object();
             tc_json["tool_call_id"] = std::get<0>(tc);
-            tc_json["tool_name"] = std::get<1>(tc);
-            tc_json["parameters"] = std::get<2>(tc);
+            tc_json["tool_name"]    = std::get<1>(tc);
+            tc_json["parameters"]   = std::get<2>(tc);
             json.push_back(tc_json);
         }
 
@@ -727,42 +700,284 @@ void test_command7_parser_compare(testing & t) {
 
     std::string input = std::accumulate(tokens.begin(), tokens.end(), std::string());
 
-    // Run tests
-    t.test("legacy_parse", [&](testing & /* t */) {
-        test_legacy(input, false, false);
-    });
+    t.test("current_parse", [&](testing & /* t */) { test_current(parser, input, false, false); });
+    t.bench("current_parse_benchmark complete", [&]() { test_current(parser, input, false, false); }, 100);
+    t.bench(
+        "current_parse_benchmark incremental",
+        [&]() {
+            std::string in;
+            for (auto i = 0u; i < tokens.size(); i++) {
+                in += tokens[i];
+                test_current(parser, in, i + 1 < tokens.size(), false);
+            }
+        },
+        20);
+}
 
-    t.test("current_parse", [&](testing & /* t */) {
-        test_current(parser, input, false, false);
+// Test that tool names that are proper prefixes of other tool names don't cause
+// premature matching during incremental parsing.
+// For example, "special_function" should not match when parsing "special_function_with_opt".
+static void test_prefix_tool_names(testing & t) {
+    // Create tools where one name is a proper prefix of another
+    json tools = json::array();
+
+    json tool_short = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function" },
+              { "description", "A special function" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_short);
+
+    json tool_long = {
+        { "type", "function" },
+        { "function",
+          {
+              { "name", "special_function_with_opt" },
+              { "description", "A special function with optional params" },
+              { "parameters",
+                {
+                    { "type", "object" },
+                    { "properties",
+                      {
+                          { "arg1", { { "type", "integer" } } },
+                          { "arg2", { { "type", "integer" } } },
+                      } },
+                    { "required", { "arg1" } },
+                } },
+          } }
+    };
+    tools.push_back(tool_long);
+
+    // Use standard_constructed_tools which had the prefix matching bug
+    std::map<std::string, std::string> markers = {
+        { "tool_call_start_marker", "<tool_call>" },
+        { "tool_call_end_marker", "</tool_call>" },
+        { "function_opener", "<function=" },
+        { "function_closer", "</function>" },
+        { "function_name_suffix", ">" },
+        { "parameter_key_prefix", "<param=" },
+        { "parameter_key_suffix", ">" },
+        { "parameter_closer", "</param>" },
+    };
+
+    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
+        auto content   = p.rule("content", p.content(p.until("<tool_call>")));
+        auto tool_call = p.standard_constructed_tools(markers, tools, false, false);
+        return content + p.zero_or_more(p.space() + tool_call) + p.end();
     });
 
-    // Run benchmarks
-    t.bench("legacy_parse_benchmark complete", [&]() {
-        test_legacy(input, false, false);
+    // Test parsing the long tool name - this should NOT trigger the short tool name
+    t.test("parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function_with_opt", msg.tool_calls[0].name);
+        }
     });
 
-    t.bench("legacy_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
+    // Test incremental parsing - the key test case
+    // This ensures that when incrementally parsing "special_function_with_opt",
+    // we don't prematurely emit "special_function" as a tool call
+    t.test("incremental parse long tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function_with_opt>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        std::vector<std::string> tokens = simple_tokenize(input);
+
+        common_chat_msg prev;
+        for (auto it = tokens.begin(); it != tokens.end(); it++) {
+            std::string in = std::accumulate(tokens.begin(), it + 1, std::string());
+
+            common_peg_parse_context ctx(in, (it + 1 < tokens.end()) ? COMMON_PEG_PARSE_FLAG_LENIENT : COMMON_PEG_PARSE_FLAG_NONE);
+            auto                     result = parser.parse(ctx);
+
+            if (!t.assert_equal("not fail", false, result.fail())) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                return;
+            }
+
+            common_chat_msg msg;
+            auto            mapper = common_chat_peg_mapper(msg);
+            mapper.from_ast(ctx.ast, result);
+
+            // The critical check: during incremental parsing, we should never
+            // see "special_function" as the tool name when parsing "special_function_with_opt"
+            for (const auto & tc : msg.tool_calls) {
+                if (!t.assert_equal("tool name should not be short prefix", false,
+                                    tc.name == "special_function")) {
+                    t.log("Premature tool name match at input: " + in);
+                    return;
+                }
+            }
 
             try {
-                test_legacy(in, i + 1 < tokens.size(), false);
-            } catch (common_chat_msg_partial_exception & /* e */) {
-                // Do nothing, this is expected
+                auto diffs = common_chat_msg_diff::compute_diffs(prev, msg);
+            } catch (const std::exception & e) {
+                t.log(in.substr(0, result.end) + "[failed->]" + in.substr(result.end));
+                t.assert_true(std::string("diff failed with ") + e.what(), false);
+                return;
             }
+
+            prev = msg;
         }
-    }, 20);
 
-    t.bench("current_parse_benchmark complete", [&]() {
-        test_current(parser, input, false, false);
-    }, 100);
+        // Final check: the complete parse should have the correct tool name
+        t.assert_equal("final tool calls count", 1u, prev.tool_calls.size());
+        if (!prev.tool_calls.empty()) {
+            t.assert_equal("final tool name", "special_function_with_opt", prev.tool_calls[0].name);
+        }
+    });
 
-    t.bench("current_parse_benchmark incremental", [&]() {
-        std::string in;
-        for (auto i = 0u; i < tokens.size(); i++) {
-            in += tokens[i];
-            test_current(parser, in, i + 1 < tokens.size(), false);
+    // Test parsing the short tool name still works
+    t.test("parse short tool name", [&](testing & t) {
+        std::string input =
+            "Let me call the function."
+            "<tool_call>"
+            "<function=special_function>"
+            "<param=arg1>42</param>"
+            "</function>"
+            "</tool_call>";
+
+        common_peg_parse_context ctx(input);
+        auto                     result = parser.parse(ctx);
+
+        t.assert_true("success", result.success());
+
+        common_chat_msg msg;
+        auto            mapper = common_chat_peg_mapper(msg);
+        mapper.from_ast(ctx.ast, result);
+
+        t.assert_equal("content", "Let me call the function.", msg.content);
+        t.assert_equal("tool calls count", 1u, msg.tool_calls.size());
+        if (!msg.tool_calls.empty()) {
+            t.assert_equal("tool name", "special_function", msg.tool_calls[0].name);
         }
-    }, 20);
+    });
+}
+
+static void test_tagged_peg_parser(testing & t) {
+    t.test("basic tag extraction", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("greeting", p.until(" ")) + " " + p.tag("name", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Hello World");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("greeting tag", "Hello", result.tags.at("greeting"));
+        t.assert_equal("name tag", "World", result.tags.at("name"));
+    });
+
+    t.test("duplicate tags overwrite", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("item", p.until(",")) + "," + p.tag("item", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("first,second");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("item tag", "second", result.tags.at("item"));
+    });
+
+    t.test("no tags extracted", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.rest() + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Hello");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("empty tags", 0u, result.tags.size());
+    });
+
+    t.test("structured extraction", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            auto header = p.tag("header", p.until("\n"));
+            auto body = p.tag("body", p.rest());
+            return header + "\n" + body + p.end();
+        });
+
+        auto result = parser.parse_and_extract("Title\nBody content here");
+        t.assert_true("success", result.result.success());
+        t.assert_equal("header", "Title", result.tags.at("header"));
+        t.assert_equal("body", "Body content here", result.tags.at("body"));
+    });
+
+    t.test("partial parse", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.tag("prefix", p.until(":")) + ":" + p.tag("value", p.rest()) + p.end();
+        });
+
+        auto result = parser.parse_and_extract("key:val", COMMON_PEG_PARSE_FLAG_LENIENT);
+        t.assert_true("not fail", !result.result.fail());
+        t.assert_equal("prefix tag", "key", result.tags.at("prefix"));
+        t.assert_equal("value tag", "val", result.tags.at("value"));
+    });
+
+    t.test("find in the middle", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+        });
+
+        std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call>{ \"fun_name\" : { \"arg\" : 1 }</tool_call>";
+        auto result = parser.parse_anywhere_and_extract(tpl);
+        t.assert_true("success", result.result.success());
+    });
+
+    t.test("fail find in the middle", [&](testing & t) {
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            return p.choice({ p.literal("{"), p.literal(":") }) + p.space() + p.literal("\"") + p.atomic(p.literal("fun_name"));
+        });
+
+        std::string tpl = "This is a very long jinja template string. We have tools. We will try to call them now: <tool_call><fun=fun_name><arg name=arg>1</arg></tool_call>";
+        auto result = parser.parse_anywhere_and_extract(tpl);
+        t.assert_true("failure", result.result.fail());
+    });
+
+    t.test("find function tag with name", [&](testing &t) {
+        std::string haystack = "\n<tool_call>\n<function=foofoo>\n<parameter=first>\nXXXX\n</parameter>\n<parameter=second>\nYYYY\n</parameter>\n</function>\n</tool_call>\n";
+        auto parser = build_tagged_peg_parser([](common_peg_parser_builder & p) {
+            std::string needle = "foofoo";
+            return p.tag("fun_marker", p.choice({
+            p.tag("fun_pre", p.literal("<") + p.until_one_of({ ">", needle })) + p.literal(needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("<")) + p.until(">") + p.literal(">")) + p.space(),
+            p.tag("fun_pre", p.literal("[") + p.until_one_of({ "]", needle })) + p.literal(needle) +
+                p.tag("fun_post", p.negate(p.space() + p.literal("[") + p.until("]") + p.literal("]")) + p.space()) }));
+        });
+        auto result = parser.parse_anywhere_and_extract(haystack);
+        t.assert_true("success", result.result.success());
+        t.assert_equal("fun_pre should be '<function='", "<function=", result.tags["fun_pre"]);
+        t.assert_equal("fun_post should be '>'", ">", result.tags["fun_post"]);
+    });
 }
diff --git a/tests/test-chat-template.cpp b/tests/test-chat-template.cpp
index dee2240ec2..6cc132131c 100644
--- a/tests/test-chat-template.cpp
+++ b/tests/test-chat-template.cpp
@@ -1,6 +1,8 @@
 #include <string>
+#include <utility>
 #include <vector>
 #include <sstream>
+#include <regex>
 #include <iostream>
 #include <fstream>
 #include <filesystem>
@@ -12,8 +14,6 @@
 
 #include "llama.h"
 #include "common.h"
-#include "minja/chat-template.hpp"
-#include "minja/minja.hpp"
 #include "chat.h"
 #include "jinja/runtime.h"
 #include "jinja/parser.h"
@@ -22,17 +22,16 @@
 
 using json = nlohmann::ordered_json;
 
-int main_automated_tests(void);
+static int main_automated_tests(void);
 
-void run_multiple(std::string dir_path, bool stop_on_first_failure, json input, bool use_common = false);
-void run_single(std::string contents, json input, bool use_common = false, const std::string & output_path = "");
+static void run_multiple(const std::string& dir_path, bool stop_on_first_failure, const json& input, bool use_common = false);
+static void run_single(const std::string& contents, json input, bool use_common = false, const std::string & output_path = "");
 
-
-
-std::string HELP = R"(
+static std::string HELP = R"(
 Usage: test-chat-template [OPTIONS] PATH_TO_TEMPLATE
 Options:
   -h, --help               Show this help message and exit.
+  --with-tools             Add a tool and a tool call to the default JSON input
   --json <path>            Path to the JSON input file.
   --stop-on-first-fail     Stop testing on the first failure (default: false).
   --no-common              Use direct Jinja engine instead of common chat templates (default: use common).
@@ -42,7 +41,7 @@ If PATH_TO_TEMPLATE is a directory, runs all .jinja files in that directory.
 If PATH_TO_TEMPLATE is omitted, runs automated tests (default CI mode).
 )";
 
-std::string DEFAULT_JSON = R"({
+static std::string DEFAULT_JSON = R"({
     "messages": [
         {
             "role": "user",
@@ -58,12 +57,65 @@ std::string DEFAULT_JSON = R"({
     "add_generation_prompt": true
 })";
 
+static std::string DEFAULT_JSON_WITH_TOOLS = R"({
+    "messages": [
+        {
+            "role": "user",
+            "content": "Hello, how are you?"
+        },
+        {
+            "role": "assistant",
+            "content": "I am fine, thank you!"
+        },
+        {
+            "role": "user",
+            "content": "Call a tool!"
+        },
+        {
+            "role": "assistant",
+            "tool_calls": [
+                {
+                    "id": "call00001",
+                    "type": "function",
+                    "function": {
+                        "name": "test",
+                        "arguments": { "arg": "hello" }
+                    }
+                }
+            ]
+        }
+    ],
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "test",
+                "description": "Test",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "arg": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": ["arg"]
+            }
+        }
+    ],
+    "bos_token": "<s>",
+    "eos_token": "</s>",
+    "add_generation_prompt": true
+})";
+
+
 int main(int argc, char ** argv) {
     std::vector<std::string> args(argv, argv + argc);
 
     std::string tmpl_path;
     std::string json_path;
     std::string output_path;
+    std::string & json_to_use = DEFAULT_JSON;
     bool stop_on_first_fail = false;
     bool use_common = true;
 
@@ -71,9 +123,12 @@ int main(int argc, char ** argv) {
         if (args[i] == "--help" || args[i] == "-h") {
             std::cout << HELP << "\n";
             return 0;
-        } else if (args[i] == "--json" && i + 1 < args.size()) {
+        }
+        if (args[i] == "--json" && i + 1 < args.size()) {
             json_path = args[i + 1];
             i++;
+        } else if (args[i] == "--with-tools") {
+            json_to_use = DEFAULT_JSON_WITH_TOOLS;
         } else if (args[i] == "--stop-on-first-fail") {
             stop_on_first_fail = true;
         } else if (args[i] == "--output" && i + 1 < args.size()) {
@@ -106,7 +161,7 @@ int main(int argc, char ** argv) {
             std::istreambuf_iterator<char>());
         input_json = json::parse(content);
     } else {
-        input_json = json::parse(DEFAULT_JSON);
+        input_json = json::parse(json_to_use);
     }
 
     std::filesystem::path p(tmpl_path);
@@ -126,7 +181,7 @@ int main(int argc, char ** argv) {
     return 0;
 }
 
-void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, bool use_common) {
+void run_multiple(const std::string& dir_path, bool stop_on_first_fail, const json& input, bool use_common) {
     std::vector<std::string> failed_tests;
 
     // list all files in models/templates/ and run each
@@ -163,10 +218,10 @@ void run_multiple(std::string dir_path, bool stop_on_first_fail, json input, boo
 
 static std::string normalize_newlines(const std::string & s) {
 #ifdef _WIN32
-    static const std::regex nl_regex("\r\n");
-    return std::regex_replace(s, nl_regex, "\n");
+  static const std::regex nl_regex("\r\n");
+  return std::regex_replace(s, nl_regex, "\n");
 #else
-    return s;
+  return s;
 #endif
 }
 
@@ -181,7 +236,7 @@ static std::string format_using_common(
     common_chat_templates_inputs inputs;
     inputs.use_jinja = true;
     inputs.messages = messages;
-    inputs.tools = tools;
+    inputs.tools = std::move(tools);
     inputs.add_generation_prompt = true;
     auto output = common_chat_templates_apply(tmpls.get(), inputs).prompt;
     output = normalize_newlines(output);
@@ -210,7 +265,7 @@ static jinja::value_string format_using_direct_engine(
 
     jinja::runtime runtime(ctx);
     const jinja::value results = runtime.execute(ast);
-    auto parts = runtime.gather_string_parts(results);
+    auto parts = jinja::runtime::gather_string_parts(results);
 
     std::cout << "\n=== RESULTS ===\n";
     for (const auto & part : parts->as_string().parts) {
@@ -221,7 +276,7 @@ static jinja::value_string format_using_direct_engine(
 }
 
 
-void run_single(std::string contents, json input, bool use_common, const std::string & output_path) {
+void run_single(const std::string& contents, json input, bool use_common, const std::string & output_path) {
     jinja::enable_debug(true);
 
     jinja::value_string output_parts;
@@ -281,7 +336,7 @@ static common_chat_msg simple_msg(const std::string & role, const std::string &
 int main_automated_tests(void) {
     // jinja::enable_debug(true);
 
-    llama_chat_message conversation[] = {
+    std::vector<llama_chat_message> conversation {
         {"system", "You are a helpful assistant"},
         {"user", "Hello"},
         {"assistant", "Hi there"},
@@ -289,54 +344,16 @@ int main_automated_tests(void) {
         {"assistant", "   I am an assistant   "},
         {"user", "Another question"},
     };
-    size_t message_count = 6;
-    std::vector<std::string> templates = {
-        // teknium/OpenHermes-2.5-Mistral-7B
-       "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
-        // mistralai/Mistral-7B-Instruct-v0.2
-        "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
-        // TheBloke/FusionNet_34Bx2_MoE-AWQ
-        "{%- for idx in range(0, messages|length) -%}\\n{%- if messages[idx]['role'] == 'user' -%}\\n{%- if idx > 1 -%}\\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\\n{%- else -%}\\n{{- messages[idx]['content'] + ' [/INST]' -}}\\n{%- endif -%}\\n{% elif messages[idx]['role'] == 'system' %}\\n{{- '[INST] <<SYS>>\\\\n' + messages[idx]['content'] + '\\\\n<</SYS>>\\\\n\\\\n' -}}\\n{%- elif messages[idx]['role'] == 'assistant' -%}\\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\\n{% endif %}\\n{% endfor %}",
-        // bofenghuang/vigogne-2-70b-chat
-        "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'Vous êtes Vigogne, un assistant IA créé par Zaion Lab. Vous suivez extrêmement bien les instructions. Aidez autant que vous le pouvez.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\\\n' + system_message + '\\\\n<</SYS>>\\\\n\\\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\\\n' + content.strip() + '\\\\n<</SYS>>\\\\n\\\\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
-        // mlabonne/AlphaMonarch-7B
-        "{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
-        // google/gemma-7b-it
-        "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}",
-        // OrionStarAI/Orion-14B-Chat
-        "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
-        // openchat/openchat-3.5-0106
-        // The included chat_template differs from the author's suggestions here: https://huggingface.co/openchat/openchat_3.5/discussions/5#65448109b4a3f3a2f486fd9d
-        // So we match against the included template but implement the suggested version.
-        "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
-        // deepseek-ai/deepseek-coder-33b-instruct
-        "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
-        // eachadea/vicuna-13b-1.1
-        // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-        "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{- '' + message['content'] + '\n\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-        // Orca-Vicuna
-        // No template included in tokenizer_config.json, so this template likely needs to be manually set.
-        "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
-        // CohereForAI/c4ai-command-r-plus
-        "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-        // Llama-3
-        "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
-        //Phi-3-mini
-        "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-        //Phi-3-small
-        "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-        //Phi-3-medium
-        "{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
-        //Phi-3-vision
-        "{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{- '<|assistant|>\n' -}}{% endif %}",
-        // ChatGLM3
-        "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-        // ChatGLM4
-        u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
-        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
-        u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}",
-        // DeepSeek-V2
-        "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
+
+    // std::string wrong = /* .template_str= */ u8"[gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的，你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}\n\n## {{ tool['function']['name'] }}\n\n{{ tool['function'] | tojson(indent=4) }}\n......{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}";
+    struct TestCase {
+        std::string name;
+        std::string template_str;
+        std::string expected_output;
+        std::string expected_output_jinja;
+        std::string bos_token = "";
+        std::string eos_token = "";
+        bool supported_with_jinja = true;
     };
     std::vector<TestCase> test_cases {
         {
@@ -599,23 +616,23 @@ int main_automated_tests(void) {
     supported_tmpl.resize(res);
     res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
     std::cout << "Built-in chat templates:\n";
-    for (auto tmpl : supported_tmpl) {
+    for (const auto *tmpl : supported_tmpl) {
         std::cout << "  " << tmpl << "\n";
     }
 
     // test invalid chat template
-    res = llama_chat_apply_template(nullptr, "INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
+    res = llama_chat_apply_template("INVALID TEMPLATE", conversation.data(), conversation.size(), true, formatted_chat.data(), formatted_chat.size());
     assert(res < 0);
+    const auto add_generation_prompt = true;
 
     for (const auto & test_case : test_cases) {
         std::cout << "\n\n=== " << test_case.name << " ===\n\n";
         formatted_chat.resize(1024);
         res = llama_chat_apply_template(
-            nullptr,
-            custom_template.c_str(),
-            conversation,
-            message_count,
-            true,
+            test_case.template_str.c_str(),
+            conversation.data(),
+            conversation.size(),
+            add_generation_prompt,
             formatted_chat.data(),
             formatted_chat.size()
         );
@@ -631,6 +648,7 @@ int main_automated_tests(void) {
     }
 
     std::vector<common_chat_msg> messages;
+    messages.reserve(conversation.size());
     for (const auto & msg : conversation) {
         messages.push_back(simple_msg(msg.role, msg.content));
     }
@@ -661,46 +679,6 @@ int main_automated_tests(void) {
         }
     }
 
-    // TODO: llama_chat_format_single will be deprecated, remove these tests later
-
-    // test llama_chat_format_single for system message
-    std::cout << "\n\n=== llama_chat_format_single (system message) ===\n\n";
-    std::vector<common_chat_msg> chat2;
-    common_chat_msg sys_msg{"system", "You are a helpful assistant"};
-
-    auto fmt_sys = [&](std::string tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
-        auto output = common_chat_format_single(tmpls.get(), chat2, sys_msg, false, /* use_jinja= */ false);
-        std::cout << "fmt_sys(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_sys("chatml") == "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n");
-    assert(fmt_sys("llama2") == "[INST] You are a helpful assistant\n");
-    assert(fmt_sys("gemma")  == ""); // for gemma, system message is merged with user message
-    assert(fmt_sys("llama3") == "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|>");
-
-
-    // test llama_chat_format_single for user message
-    std::cout << "\n\n=== llama_chat_format_single (user message) ===\n\n";
-    chat2.push_back({"system", "You are a helpful assistant"});
-    chat2.push_back({"user", "Hello"});
-    chat2.push_back({"assistant", "I am assistant"});
-    common_chat_msg new_msg{"user", "How are you"};
-
-    auto fmt_single = [&](std::string tmpl_str) {
-        auto tmpls = common_chat_templates_init(/* model= */ nullptr, tmpl_str);
-        auto output = common_chat_format_single(tmpls.get(), chat2, new_msg, true, /* use_jinja= */ false);
-        std::cout << "fmt_single(" << tmpl_str << ") : " << output << "\n";
-        std::cout << "-------------------------\n";
-        return output;
-    };
-    assert(fmt_single("chatml") == "\n<|im_start|>user\nHow are you<|im_end|>\n<|im_start|>assistant\n");
-    assert(fmt_single("llama2") == "[INST] How are you [/INST]");
-    assert(fmt_single("gemma")  == "\n<start_of_turn>user\nHow are you<end_of_turn>\n<start_of_turn>model\n");
-    assert(fmt_single("llama3") == "<|start_header_id|>user<|end_header_id|>\n\nHow are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n");
-    // assert(fmt_single("gigachat") == "user<|role_sep|>How are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>");
-
     std::cout << "\nOK: All tests passed successfully.\n";
 
     return 0;
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index f3d19118b5..4f45826ea5 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -5,18 +5,22 @@
 //
 //    cmake -B build && cmake --build build --parallel && ./build/bin/test-chat ../minja/build/tests/*.jinja 2>/dev/null
 //
+#include "../src/llama-grammar.h"
+#include "../src/unicode.h"
+#include "chat-auto-parser.h"
 #include "chat.h"
-
+#include "common.h"
+#include "ggml.h"
 #include "log.h"
 
-#include "../src/unicode.h"
-#include "../src/llama-grammar.h"
-
-#include <nlohmann/json.hpp>
-
+#include <algorithm>
+#include <exception>
 #include <fstream>
-#include <iostream>
 #include <functional>
+#include <iostream>
+#include <nlohmann/json.hpp>
+#include <set>
+#include <stdexcept>
 #include <string>
 
 using json = nlohmann::ordered_json;
@@ -33,6 +37,7 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg_diff &
     os << "}";
     return os;
 }
+
 // operator<< for vector<common_chat_msg_diff>:
 static std::ostream & operator<<(std::ostream & os, const std::vector<common_chat_msg_diff> & diffs) {
     os << "[\n";
@@ -42,6 +47,7 @@ static std::ostream & operator<<(std::ostream & os, const std::vector<common_cha
     os << "]";
     return os;
 }
+
 static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg) {
     os << "{ role: " << msg.role << "; ";
     os << "content: " << msg.content << "; ";
@@ -53,7 +59,8 @@ static std::ostream & operator<<(std::ostream & os, const common_chat_msg & msg)
     os << "reasoning_content: " << msg.reasoning_content << "; ";
     os << "tool_calls: [\n";
     for (const auto & tool_call : msg.tool_calls) {
-        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id << " },\n";
+        os << "  { name: " << tool_call.name << "; arguments: " << tool_call.arguments << "; id: " << tool_call.id
+           << " },\n";
     }
     os << "]";
     os << "}";
@@ -70,29 +77,29 @@ static common_chat_msg normalize(const common_chat_msg & msg) {
         try {
             tool_call.arguments = json::parse(tool_call.arguments).dump();
         } catch (const std::exception &) {
-            // Do nothing
         }
     }
     return normalized;
 }
 
-
-template <>
-bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
+template <> bool equals(const common_chat_msg & expected, const common_chat_msg & actual) {
     return normalize(expected) == normalize(actual);
 }
 
 template <class T> static void assert_equals(const T & expected, const T & actual) {
     if (!equals(expected, actual)) {
-        std::cerr << "Expected:```\n" << expected << "\n```" << std::endl;
-        std::cerr << "Actual:```\n" << actual << "\n```" << std::endl;
-        std::cerr << std::flush;
+        std::ostringstream oss_expected;
+        oss_expected << expected;
+        std::ostringstream oss_actual;
+        oss_actual << actual;
+        LOG_ERR("Expected: %s\n", oss_expected.str().c_str());
+        LOG_ERR("Actual: %s\n", oss_actual.str().c_str());
+        common_log_flush(common_log_main());
         throw std::runtime_error("Test failed");
     }
 }
 
 static std::string read_file(const std::string & path) {
-    std::cerr << "# Reading: " << path << '\n' << std::flush;
     std::ifstream fs(path, std::ios_base::binary);
     if (!fs.is_open()) {
         fs = std::ifstream("../" + path, std::ios_base::binary);
@@ -118,6 +125,207 @@ static std::unique_ptr<llama_grammar> build_grammar(const std::string & grammar_
         llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root", false, nullptr, 0, nullptr, 0));
 }
 
+// Helper to format a code point as a readable string
+static std::string format_codepoint(uint32_t cp) {
+    if (cp >= 32 && cp < 127) {
+        return std::string("'") + static_cast<char>(cp) + "'";
+    } else if (cp == '\n') {
+        return "'\\n'";
+    } else if (cp == '\r') {
+        return "'\\r'";
+    } else if (cp == '\t') {
+        return "'\\t'";
+    } else {
+        return "U+" + std::to_string(cp);
+    }
+}
+
+// Helper to format expected element from grammar stack
+static std::string format_expected_element(const llama_grammar_rules & /* rules*/, const llama_grammar_element * elem) {
+    if (!elem) {
+        return "<end>";
+    }
+
+    switch (elem->type) {
+        case LLAMA_GRETYPE_END:
+            return "<end of rule>";
+        case LLAMA_GRETYPE_ALT:
+            return "<alternative>";
+        case LLAMA_GRETYPE_RULE_REF:
+            {
+                // Find rule name - just show rule ID for now
+                return "<rule-" + std::to_string(elem->value) + ">";
+            }
+        case LLAMA_GRETYPE_CHAR:
+            {
+                std::string                   result;
+                const llama_grammar_element * pos   = elem;
+                bool                          first = true;
+
+                do {
+                    if (!first) {
+                        result += " | ";
+                    }
+                    first = false;
+
+                    if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+                        // Range like [a-z]
+                        result += "[" + format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value) + "]";
+                        pos += 2;
+                    } else {
+                        result += format_codepoint(pos->value);
+                        pos += 1;
+                    }
+                } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+                return result;
+            }
+        case LLAMA_GRETYPE_CHAR_NOT:
+            {
+                std::string                   result = "[^";
+                const llama_grammar_element * pos    = elem;
+                bool                          first  = true;
+
+                do {
+                    if (!first) {
+                        result += " ";
+                    }
+                    first = false;
+
+                    if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
+                        result += format_codepoint(pos->value) + "-" + format_codepoint(pos[1].value);
+                        pos += 2;
+                    } else {
+                        result += format_codepoint(pos->value);
+                        pos += 1;
+                    }
+                } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
+
+                return result + "]";
+            }
+        case LLAMA_GRETYPE_CHAR_ANY:
+            return "<any char>";
+        case LLAMA_GRETYPE_TOKEN:
+            return "<token-" + std::to_string(elem->value) + ">";
+        case LLAMA_GRETYPE_TOKEN_NOT:
+            return "<not-token-" + std::to_string(elem->value) + ">";
+        default:
+            return "<unknown>";
+    }
+}
+
+// Get description of what the grammar expects at current position
+static std::string get_expected_description(const llama_grammar_rules & rules, const llama_grammar_stacks & stacks) {
+    if (stacks.empty()) {
+        return "<no valid continuations>";
+    }
+
+    std::string           result;
+    std::set<std::string> seen;
+
+    for (const auto & stack : stacks) {
+        if (stack.empty()) {
+            if (seen.insert("<end>").second) {
+                if (!result.empty()) {
+                    result += " OR ";
+                }
+                result += "<end>";
+            }
+            continue;
+        }
+
+        const llama_grammar_element * elem = stack.back();
+        std::string                   desc = format_expected_element(rules, elem);
+        if (seen.insert(desc).second) {
+            if (!result.empty()) {
+                result += " OR ";
+            }
+            result += desc;
+        }
+    }
+
+    return result;
+}
+
+// Result of a detailed grammar match attempt
+struct grammar_match_result {
+    bool        success            = false;  // Did the string fully match the grammar?
+    size_t      matched_bytes      = 0;      // Bytes successfully matched before failure
+    size_t      matched_codepoints = 0;      // Codepoints successfully matched before failure
+    size_t      total_bytes        = 0;      // Total bytes in input
+    size_t      total_codepoints   = 0;      // Total codepoints in input
+    std::string matched_prefix;              // The portion that was successfully matched
+    std::string failing_char;                // The character that caused failure (if any)
+    std::string expected_description;        // What the grammar expected at failure point
+    bool        incomplete = false;          // True if matched all input but grammar expects more
+};
+
+// Detailed version of match_string that returns failure information
+static grammar_match_result match_string_detailed(const std::string & input, llama_grammar * grammar) {
+    grammar_match_result result;
+    result.total_bytes = input.size();
+
+    const auto cpts         = unicode_cpts_from_utf8(input);
+    result.total_codepoints = cpts.size();
+
+    auto &       stacks_cur = llama_grammar_get_stacks(grammar);
+    const auto & rules      = llama_grammar_get_rules(grammar);
+
+    size_t byte_pos = 0;
+
+    for (size_t i = 0; i < cpts.size(); i++) {
+        const auto & cpt = cpts[i];
+
+        // Get expected before accepting (for error reporting)
+        std::string expected_before = get_expected_description(rules, stacks_cur);
+
+        llama_grammar_accept(grammar, cpt);
+
+        // Calculate byte position for this codepoint
+        size_t cpt_bytes = 0;
+        if (cpt < 0x80) {
+            cpt_bytes = 1;
+        } else if (cpt < 0x800) {
+            cpt_bytes = 2;
+        } else if (cpt < 0x10000) {
+            cpt_bytes = 3;
+        } else {
+            cpt_bytes = 4;
+        }
+
+        if (stacks_cur.empty()) {
+            // Grammar failed to match at this point
+            result.matched_bytes        = byte_pos;
+            result.matched_codepoints   = i;
+            result.matched_prefix       = input.substr(0, byte_pos);
+            result.failing_char         = format_codepoint(cpt);
+            result.expected_description = expected_before;
+            result.incomplete           = false;
+            return result;
+        }
+
+        byte_pos += cpt_bytes;
+    }
+
+    // All input matched - check if grammar is complete
+    result.matched_bytes      = input.size();
+    result.matched_codepoints = cpts.size();
+    result.matched_prefix     = input;
+
+    if (std::any_of(stacks_cur.begin(), stacks_cur.end(), [](const auto & stack) { return stack.empty(); })) {
+        // An empty stack means that the grammar has been completed
+        result.success    = true;
+        result.incomplete = false;
+    } else {
+        // Grammar expects more input
+        result.success              = false;
+        result.incomplete           = true;
+        result.expected_description = get_expected_description(rules, stacks_cur);
+    }
+
+    return result;
+}
+
 // TODO: extract to common helper (copied from test-grammar-integration.cpp)
 static bool match_string(const std::string & input, llama_grammar * grammar) {
     const auto cpts = unicode_cpts_from_utf8(input);
@@ -146,11 +354,13 @@ static std::string renormalize_json(const std::string & json_str) {
         auto json_obj = json::parse(json_str);
         return json_obj.dump();
     } catch (const std::exception & e) {
-        std::cerr << "Failed to parse JSON: " << e.what() << '\n';
-        return json_str;
+        return "";  // ignore parial JSON contents for comparison purposes
     }
 }
-static void assert_msg_equals(const common_chat_msg & expected, const common_chat_msg & actual, bool ignore_whitespace_differences = false) {
+
+static void assert_msg_equals(const common_chat_msg & expected,
+                              const common_chat_msg & actual,
+                              bool                    ignore_whitespace_differences = false) {
     assert_equals(expected.role, actual.role);
     if (ignore_whitespace_differences) {
         assert_equals(string_strip(expected.content), string_strip(actual.content));
@@ -183,7 +393,7 @@ static void assert_msg_equals(const common_chat_msg & expected, const common_cha
     }
 }
 
-common_chat_tool special_function_tool {
+static common_chat_tool special_function_tool{
     /* .name = */ "special_function",
     /* .description = */ "I'm special",
     /* .parameters = */ R"({
@@ -197,7 +407,7 @@ common_chat_tool special_function_tool {
         "required": ["arg1"]
     })",
 };
-common_chat_tool special_function_tool_with_optional_param {
+static common_chat_tool special_function_tool_with_optional_param{
     /* .name = */ "special_function_with_opt",
     /* .description = */ "I'm special but have optional stuff",
     /* .parameters = */ R"({
@@ -215,7 +425,25 @@ common_chat_tool special_function_tool_with_optional_param {
         "required": ["arg1"]
     })",
 };
-common_chat_tool python_tool {
+
+static common_chat_tool empty_args_tool{
+    /* .name = */ "empty_args",
+    /* .description = */ "A tool that takes no arguments",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {}
+    })",
+};
+
+static common_chat_tool empty_args_tool_no_properties{
+    /* .name = */ "empty_args_no_props",
+    /* .description = */ "A tool that takes no arguments and has no properties",
+    /* .parameters = */ R"({
+        "type": "object"
+    })",
+};
+
+static common_chat_tool python_tool{
     /* .name = */ "python",
     /* .description = */ "an ipython interpreter",
     /* .parameters = */ R"({
@@ -229,7 +457,53 @@ common_chat_tool python_tool {
         "required": ["code"]
     })",
 };
-common_chat_tool todo_list_tool {
+
+static common_chat_tool html_tool{
+    /* .name = */ "html",
+    /* .description = */ "an html validator",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "markup": {
+                "type": "string",
+                "description": "HTML markup to validate."
+            }
+        },
+        "required": ["markup"]
+    })",
+};
+
+static common_chat_tool get_time_tool{
+    /* .name = */ "get_time",
+    /* .description = */ "Get the current time in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool get_weather_tool{
+    /* .name = */ "get_weather",
+    /* .description = */ "Get the current weather in a city",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "city": {
+                "type": "string",
+                "description": "City name"
+            }
+        },
+        "required": ["city"]
+    })",
+};
+
+static common_chat_tool todo_list{
     /* .name = */ "todo_list",
     /* .description = */ "Create or update the todo list",
     /* .parameters = */ R"({
@@ -243,267 +517,280 @@ common_chat_tool todo_list_tool {
         "required": ["todos"]
     })",
 };
-common_chat_tool code_interpreter_tool {
-    /* .name = */ "code_interpreter",
-    /* .description = */ "an ipython interpreter",
+
+static common_chat_tool edit_tool{
+    /* .name = */ "edit",
+    /* .description = */ "Edit file",
     /* .parameters = */ R"({
         "type": "object",
         "properties": {
-            "code": {
+            "filename": {
                 "type": "string",
-                "description": "Python code to execute."
+                "description": "Path of file to edit"
+            },
+            "oldString": {
+                "type": "string",
+                "description": "String to replace"
+            },
+            "newString": {
+                "type": "string",
+                "description": "New (replacement) value"
             }
         },
-        "required": ["code"]
+        "required": ["filename", "oldString", "newString"]
     })",
 };
-std::vector<common_chat_tool> tools           { special_function_tool, special_function_tool_with_optional_param, python_tool };
-std::vector<common_chat_tool> llama_3_1_tools { special_function_tool, code_interpreter_tool };
 
-struct delta_data {
-    std::string        delta;
-    common_chat_params params;
+static common_chat_tool magic_tool{
+    /* .name = */ "magic",
+    /* .description = */ "Magic tool that takes a hash",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": "string"
+            },
+            "ref": {
+                "type": "string"
+            }
+        },
+        "required": ["name", "ref"]
+    })",
 };
 
-static common_chat_msg simple_assist_msg(const std::string & content, const std::string & reasoning_content = "", const std::string & tool_name = "", const std::string & arguments = "", const std::string & id = "") {
-    common_chat_msg msg;
-    msg.role = "assistant";
-    msg.content = content;
-    msg.reasoning_content = reasoning_content;
-    if (!tool_name.empty()) {
-        msg.tool_calls.push_back({ tool_name, arguments, id });
-    }
-    return msg;
-}
-
-static delta_data init_delta(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                             const common_chat_msg & user_message,
-                             const common_chat_msg & delta_message,
-                             const std::vector<common_chat_tool> & tools,
-                             const common_chat_tool_choice & tool_choice) {
-    common_chat_templates_inputs inputs;
-    inputs.parallel_tool_calls = true;
-    inputs.messages.push_back(user_message);
-    inputs.tools       = tools;
-    inputs.tool_choice = tool_choice;
-    auto params_prefix = common_chat_templates_apply(tmpls, inputs);
-
-    inputs.messages.push_back(delta_message);
-    inputs.add_generation_prompt = false;
-    auto params_full             = common_chat_templates_apply(tmpls, inputs);
-
-    std::string prefix = params_prefix.prompt;
-    std::string full   = params_full.prompt;
-
-    if (full == prefix) {
-        throw std::runtime_error("Full message is the same as the prefix");
-    }
-
-    size_t common_prefix_length = 0;
-    for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
-        if (prefix[i] != full[i]) {
-            break;
-        }
-        if (prefix[i] == '<') {
-            // DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
-            // but it removes thinking tags for past messages.
-            // The prefix and full strings diverge at <think> vs. <｜tool▁calls▁begin｜>, we avoid consuming the leading <.
-            continue;
-        }
-        common_prefix_length = i + 1;
-    }
-    auto delta = full.substr(common_prefix_length);
+static common_chat_tool magic_int_tool{
+    /* .name = */ "magic_int",
+    /* .description = */ "Magic tool that takes a hash",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "ref": {
+                "type": "integer"
+            },
+            "name": {
+                "type": "string"
+            }
+        },
+        "required": ["ref"]
+    })",
+};
 
-    // Strip end tokens
-    for (const auto & end_token : end_tokens) {
-        // rfind to find the last occurrence
-        auto pos = delta.rfind(end_token);
-        if (pos != std::string::npos) {
-            delta = delta.substr(0, pos);
-            break;
-        }
-    }
-    return { delta, params_full };
-}
+static common_chat_tool amount_tool{
+    /* .name = */ "amount",
+    /* .description = */ "Amount converter",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "orig": {
+                "type": "number"
+            }
+        },
+        "required": ["orig"]
+    })",
+};
 
-/*
-  Applies the template to 1 user message w/ add_generation_prompt=true, then w/ the test message w/ add_generation_prompt=false,
-  gets the diff, removes any end tokens and parses the result w/ the grammar, checking that
-  the parsed message is the same as the test_message
-*/
-static void test_templates(const struct common_chat_templates * tmpls, const std::vector<std::string> & end_tokens,
-                          const common_chat_msg & test_message,
-                          const std::vector<common_chat_tool> & tools = {},
-                          const std::string & expected_delta = "",
-                          bool expect_grammar_triggered = true,
-                          bool test_grammar_if_triggered = true,
-                          common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE,
-                          bool ignore_whitespace_differences = false
-                        ) {
-    common_chat_msg user_message;
-    user_message.role = "user";
-    user_message.content = "Hello, world!";
-
-    for (const auto & tool_choice : std::vector<common_chat_tool_choice> {COMMON_CHAT_TOOL_CHOICE_AUTO, COMMON_CHAT_TOOL_CHOICE_REQUIRED}) {
-        auto data = init_delta(tmpls, end_tokens, user_message, test_message, tools, tool_choice);
-        if (!expected_delta.empty()) {
-            if (ignore_whitespace_differences) {
-                assert_equals(string_strip(expected_delta), string_strip(data.delta));
-            } else {
-                assert_equals(expected_delta, data.delta);
+static common_chat_tool toggle_tool{
+    /* .name = */ "toggle",
+    /* .description = */ "Toggle a feature",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "enabled": {
+                "type": "boolean",
+                "description": "Whether to enable the feature"
             }
-        }
+        },
+        "required": ["enabled"]
+    })",
+};
 
-        if (expect_grammar_triggered) {
-            // TODO @ngxson : refactor common_chat_parse to avoid passing format/reasoning_format every time
-            common_chat_parser_params params;
-            params.format = data.params.format;
-            params.reasoning_format = reasoning_format;
-            const auto msg = common_chat_parse(data.delta, /* is_partial= */ false, params);
-            assert_msg_equals(test_message, msg, ignore_whitespace_differences);
-        }
+static common_chat_tool nullable_tool{
+    /* .name = */ "set_nullable",
+    /* .description = */ "Set a nullable value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "value": {
+                "type": "null",
+                "description": "A null value"
+            }
+        },
+        "required": ["value"]
+    })",
+};
 
-        if (!test_message.tool_calls.empty()) {
-            GGML_ASSERT(!data.params.grammar.empty());
-        }
-        if (!data.params.grammar.empty()) {
-            auto grammar = build_grammar(data.params.grammar);
-            if (!grammar) {
-                throw std::runtime_error("Failed to build grammar");
+static common_chat_tool config_tool{
+    /* .name = */ "set_config",
+    /* .description = */ "Set configuration",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "config": {
+                "type": "object",
+                "description": "Configuration dict"
             }
-            auto earliest_trigger_pos = std::string::npos;
-            auto constrained = data.delta;
-            for (const auto & trigger : data.params.grammar_triggers) {
-                size_t pos = std::string::npos;
-                std::smatch match;
-                switch (trigger.type) {
-                    case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
-                    {
-                        const auto & word = trigger.value;
-                        pos = constrained.find(word);
-                        break;
-                    }
-                    case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_search(constrained, match, std::regex(pattern))) {
-                            pos = match.position(1);
-                        }
-                        break;
-                    }
-                    case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
-                    {
-                        const auto & pattern = trigger.value;
-                        if (std::regex_match(constrained, match, std::regex(pattern))) {
-                            auto mpos = std::string::npos;
-                            for (size_t i = 1; i < match.size(); ++i) {
-                                if (match[i].length() > 0) {
-                                    mpos = match.position(i);
-                                    break;
-                                }
-                            }
-                            if (mpos == std::string::npos) {
-                                mpos = match.position(0);
-                            }
-                            pos = mpos;
-                        }
-                        break;
+        },
+        "required": ["config"]
+    })",
+};
+
+static common_chat_tool imaginary_number_tool{
+    /* .name = */ "imaginary_number",
+    /* .description = */ "Imaginary number converter",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "number": {
+                "type": "object",
+                "properties": {
+                    "real": {
+                        "type": "number"
+                    },
+                    "imaginary": {
+                        "type": "number"
                     }
-                    default:
-                        throw std::runtime_error("Unknown trigger type");
-                }
-                if (pos == std::string::npos) {
-                    continue;
-                }
-                if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
-                    earliest_trigger_pos = pos;
-                }
+                },
+                "required": ["real", "imaginary"]
             }
-            auto grammar_triggered = false;
-            if (earliest_trigger_pos != std::string::npos) {
-                constrained = constrained.substr(earliest_trigger_pos);
-                grammar_triggered = true;
+        },
+        "required": ["number"]
+    })",
+};
+
+static common_chat_tool nullable_string_tool{
+    /* .name = */ "set_nullable_str",
+    /* .description = */ "Set a nullable string value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": ["string", "null"],
+                "description": "A nullable string"
             }
-            if (data.params.grammar_lazy) {
-                assert_equals(expect_grammar_triggered, grammar_triggered);
+        },
+        "required": ["name"]
+    })",
+};
+
+static common_chat_tool nullable_string_null_first_tool{
+    /* .name = */ "set_nullable_str_nf",
+    /* .description = */ "Set a nullable string value with null first in type array",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "name": {
+                "type": ["null", "string"],
+                "description": "A nullable string with null first"
             }
+        },
+        "required": ["name"]
+    })",
+};
 
-            if (grammar_triggered && test_grammar_if_triggered && !match_string(constrained, grammar.get())) {
-                throw std::runtime_error("Failed to match delta against grammar:\n\n" + data.delta +
-                    "\n\nConstrained: " + constrained +
-                    "\n\nGrammar: " + data.params.grammar);
+static common_chat_tool nullable_int_tool{
+    /* .name = */ "set_nullable_int",
+    /* .description = */ "Set a nullable integer value",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "count": {
+                "type": ["integer", "null"],
+                "description": "A nullable integer"
             }
-        }
-    }
-}
+        },
+        "required": ["count"]
+    })",
+};
 
-/**
- * Test if streaming=true is consistant with streaming=false for given partial parser
- * Also test if there is any problem with partial message
- */
-template <typename T>
-static void test_parser_with_streaming(const common_chat_msg & expected, const std::string & raw_message, T parse_msg) {
-    constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
-        auto len = s.size();
-        if (len == 0) return 0;
-        auto i = len;
-        for (size_t back = 0; back < 4 && i > 0; ++back) {
-            --i;
-            unsigned char c = s[i];
-            if ((c & 0x80) == 0) {
-                return len;
-            } else if ((c & 0xC0) == 0xC0) {
-                size_t expected_len = 0;
-                if ((c & 0xE0) == 0xC0) expected_len = 2;
-                else if ((c & 0xF0) == 0xE0) expected_len = 3;
-                else if ((c & 0xF8) == 0xF0) expected_len = 4;
-                else return i;
-                if (len - i >= expected_len) {
-                    return len;
-                } else {
-                    return i;
-                }
+static common_chat_tool enum_no_type_tool{
+    /* .name = */ "set_unit",
+    /* .description = */ "Set a temperature unit",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "unit": {
+                "enum": ["celsius", "fahrenheit"],
+                "description": "Temperature unit"
             }
-        }
-        return len - std::min(len, size_t(3));
-    };
-    constexpr auto utf8_truncate_safe_view = [utf8_truncate_safe_len](const std::string_view s) {
-        return s.substr(0, utf8_truncate_safe_len(s));
-    };
+        },
+        "required": ["unit"]
+    })",
+};
 
-    auto merged = simple_assist_msg("");
-    auto last_msg = parse_msg("");
-    for (size_t i = 1; i <= raw_message.size(); ++i) {
-        auto curr_msg = parse_msg(std::string(utf8_truncate_safe_view(std::string_view(raw_message).substr(0, i))));
-        if (curr_msg == simple_assist_msg("")) continue;
-        LOG_INF("Streaming msg: %s\n", common_chat_msgs_to_json_oaicompat({curr_msg}).dump().c_str());
-        for (auto diff: common_chat_msg_diff::compute_diffs(last_msg, curr_msg)) {
-            LOG_INF("Streaming diff: %s\n", common_chat_msg_diff_to_json_oaicompat(diff).dump().c_str());
-            if (!diff.reasoning_content_delta.empty()) {
-                merged.reasoning_content += diff.reasoning_content_delta;
+static common_chat_tool string_param_tool{
+    /* .name = */ "string_param",
+    /* .description = */ "Tool with string parameter for testing",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "text": {
+                "type": "string",
+                "description": "A text parameter"
             }
-            if (!diff.content_delta.empty()) {
-                merged.content += diff.content_delta;
-            }
-            if (diff.tool_call_index != std::string::npos) {
-                if (!diff.tool_call_delta.name.empty()) {
-                    merged.tool_calls.push_back({diff.tool_call_delta.name, "", ""});
-                }
-                if (!diff.tool_call_delta.arguments.empty()) {
-                    GGML_ASSERT(!merged.tool_calls.empty());
-                    merged.tool_calls.back().arguments += diff.tool_call_delta.arguments;
-                }
+        },
+        "required": []
+    })",
+};
+
+static common_chat_tool quoted_unquoted_tool{
+    /* .name = */ "quoted_unquoted",
+    /* .description = */ "Tool with two string parameters, one for quoted string, one for unquoted",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "quoted": {
+                "type": "string",
+                "description": "Quoted value"
+            },
+            "unquoted": {
+                "type": "string",
+                "description": "Unquoted value"
             }
-            LOG_INF("Streaming merged: %s\n", common_chat_msgs_to_json_oaicompat({merged}).dump().c_str());
-        }
-        assert_msg_equals(curr_msg, merged, true);
-        last_msg = curr_msg;
-    }
-    assert_msg_equals(expected, parse_msg(raw_message), true);
-    assert_msg_equals(expected, merged, true);
-}
+        },
+        "required": ["quoted", "unquoted"]
+    })",
+};
+
+
+static common_chat_tool tool_2req_4opt{
+    /* .name = */ "tool_2req_4opt",
+    /* .description = */ "Tool with 2 required and 4 optional params",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "req1": { "type": "string", "description": "Required string" },
+            "req2": { "type": "integer", "description": "Required int" },
+            "opt1": { "type": "string", "description": "Optional string 1" },
+            "opt2": { "type": "integer", "description": "Optional int 1" },
+            "opt3": { "type": "string", "description": "Optional string 2" },
+            "opt4": { "type": "integer", "description": "Optional int 2" }
+        },
+        "required": ["req1", "req2"]
+    })",
+};
+
+static common_chat_tool tool_2req_5opt{
+    /* .name = */ "tool_2req_5opt",
+    /* .description = */ "Tool with 2 required and 5 optional params",
+    /* .parameters = */ R"({
+        "type": "object",
+        "properties": {
+            "req1": { "type": "string", "description": "Required string" },
+            "req2": { "type": "integer", "description": "Required int" },
+            "opt1": { "type": "string", "description": "Optional string 1" },
+            "opt2": { "type": "integer", "description": "Optional int 1" },
+            "opt3": { "type": "string", "description": "Optional string 2" },
+            "opt4": { "type": "integer", "description": "Optional int 2" },
+            "opt5": { "type": "string", "description": "Optional string 3" }
+        },
+        "required": ["req1", "req2"]
+    })",
+};
 
-const common_chat_msg message_user {
+static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
+                                            python_tool, html_tool, todo_list };
+
+const common_chat_msg message_user{
     "user",
     "Hey there!",
     /* .content_parts = */ {},
@@ -513,89 +800,217 @@ const common_chat_msg message_user {
     /* .tool_call_id = */ "",
 };
 
-const common_chat_msg message_user_parts {
+const common_chat_msg message_user_parts{
     "user",
     /* .content = */ "",
-    /* .content_parts = */ {
-        { "text", "Hey" },
-        { "text", "there" },
-    },
-    /* .tool_calls = */ {},
-    /* .reasoning_content = */ "",
+    /* .content_parts = */
+    {
+     { "text", "Hey" },
+     { "text", "there" },
+     },
+    /* .tool_calls = */
+    {                 },
+    /* .reasoning_content = */
+    "",
     /* .tool_name = */ "",
     /* .tool_call_id = */ "",
 };
 
-const common_chat_msg message_assist                              = simple_assist_msg("Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_empty                        = simple_assist_msg("");
-const common_chat_msg message_assist_thoughts_unparsed_deepseek   = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_md         = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
-const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
-
-const common_chat_msg message_assist_thoughts_unparsed_r7b       = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_unparsed_magistral = simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
-const common_chat_msg message_assist_thoughts                    = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
-const common_chat_msg message_assist_thoughts_unopened_unparsed  = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
-const common_chat_msg message_assist_thoughts_no_content         = simple_assist_msg("", "I'm\nthinking");
-const common_chat_msg message_assist_call                        = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_noopt                  = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_withopt                = simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
-const common_chat_msg message_assist_call_content                = simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_empty_args             = simple_assist_msg("", "", "special_function");
-const common_chat_msg message_assist_call_cutoff_args            = simple_assist_msg("", "", "special_function", "{\"arg");
-const common_chat_msg message_assist_call_thoughts               = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
-const common_chat_msg message_assist_call_thoughts_unparsed      = simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_thoughts_content       = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
-const common_chat_msg message_assist_call_id                     = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
-const common_chat_msg message_assist_call_idx                    = simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
-const common_chat_msg message_assist_thoughts_call_idx           = simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
-const common_chat_msg message_assist_call_python                 = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
-const common_chat_msg message_assist_call_python_lines           = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
-const common_chat_msg message_assist_call_python_lines_unclosed  = simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
-const common_chat_msg message_assist_call_code_interpreter       = simple_assist_msg("", "", "code_interpreter", "{\"code\":\"print('hey')\"}");
+static common_chat_msg simple_assist_msg(const std::string & content,
+                                         const std::string & reasoning_content = "",
+                                         const std::string & tool_name         = "",
+                                         const std::string & arguments         = "",
+                                         const std::string & id                = "") {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning_content;
+    if (!tool_name.empty() || !id.empty()) {
+        msg.tool_calls.push_back({ tool_name, arguments, id });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_tool_calls(const std::string & tool_name, const std::string & arguments) {
+    return simple_assist_msg("", "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_tool_calls_and_reasoning(const std::string & tool_name,
+                                                             const std::string & arguments,
+                                                             const std::string & reasoning) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_content_and_multiple_tool_calls(
+    const std::string &                                      reasoning,
+    const std::string &                                      content,
+    const std::vector<std::pair<std::string, std::string>> & tool_calls) {
+    common_chat_msg msg;
+    msg.role              = "assistant";
+    msg.content           = content;
+    msg.reasoning_content = reasoning;
+    for (const auto & [name, args] : tool_calls) {
+        msg.tool_calls.push_back({ name, args, "" });
+    }
+    return msg;
+}
+
+static common_chat_msg message_with_content_and_tool_call(const std::string & content,
+                                                          const std::string & tool_name,
+                                                          const std::string & arguments) {
+    return simple_assist_msg(content, "", tool_name, arguments);
+}
+
+static common_chat_msg message_with_reasoning_and_tool_call(const std::string & reasoning,
+                                                            const std::string & tool_name,
+                                                            const std::string & arguments) {
+    return simple_assist_msg("", reasoning, tool_name, arguments);
+}
+
+const common_chat_msg message_assist       = simple_assist_msg("Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_empty = simple_assist_msg("");
+const common_chat_msg message_assist_thoughts_unparsed_deepseek =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_md =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
+const common_chat_msg message_assist_thoughts_unparsed_md_partial =
+    simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
+
+const common_chat_msg message_assist_thoughts_unparsed_r7b =
+    simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_unparsed_magistral =
+    simple_assist_msg("[THINK]raisonnement[/THINK]Réponse");
+const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
+const common_chat_msg message_assist_thoughts_unopened_unparsed =
+    simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
+const common_chat_msg message_assist_thoughts_no_content = simple_assist_msg("", "I'm\nthinking");
+const common_chat_msg message_assist_call = simple_assist_msg("", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_noopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_withopt =
+    simple_assist_msg("", "", "special_function_with_opt", "{\"arg1\": 1, \"arg2\": 2}");
+const common_chat_msg message_assist_call_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_empty_args  = simple_assist_msg("", "", "special_function");
+const common_chat_msg message_assist_call_cutoff_args = simple_assist_msg("", "", "special_function", "{\"arg");
+const common_chat_msg message_assist_call_thoughts =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\":1}");
+const common_chat_msg message_assist_call_thoughts_unparsed =
+    simple_assist_msg("<think>I'm\nthinking</think>\n\n", "", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_thoughts_content =
+    simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}");
+const common_chat_msg message_assist_call_id =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "123456789");
+const common_chat_msg message_assist_call_idx =
+    simple_assist_msg("", "", "special_function", "{\"arg1\":1}", /* .id = */ "0");
+const common_chat_msg message_assist_thoughts_call_idx =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}", /* id = */ "0");
+const common_chat_msg message_assist_thoughts_partial_call =
+    simple_assist_msg("", "I'm\nthinking", "special_function", "", /* id = */ "0");
+const common_chat_msg message_assist_call_python = simple_assist_msg("", "", "python", "{\"code\":\"print('hey')\"}");
+const common_chat_msg message_assist_call_python_lines =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')\"}");
+const common_chat_msg message_assist_call_python_lines_unclosed =
+    simple_assist_msg("", "", "python", "{\"code\":\"# This is a program:\\nprint('hey')");
+const common_chat_msg message_assist_json_content =
+    simple_assist_msg("{\n  \"response\": \"Hello, world!\\nWhat's up?\"\n}");
 
 // Use for PEG parser implementations
 struct peg_test_case {
     common_chat_templates_inputs params;
-    std::string input;
-    common_chat_msg expect;
+    std::string                  input;
+    common_chat_msg              expect;
+    bool                         is_partial            = false;
+    bool                         expect_reconstruction = false;
 };
 
 struct make_peg_parser {
     common_chat_params params_;
-    common_peg_arena arena_;
-
-    make_peg_parser(common_chat_templates * tmpls, const common_chat_templates_inputs & inputs) {
-        params_ = common_chat_templates_apply(tmpls, inputs);
+    common_peg_arena   arena_;
+    bool               detailed_debug_;
+
+    make_peg_parser(common_chat_templates *              tmpls,
+                    const common_chat_templates_inputs & inputs,
+                    bool                                 detailed_debug = false) {
+        detailed_debug_ = detailed_debug;
+        params_         = common_chat_templates_apply(tmpls, inputs);
         arena_.load(params_.parser);
     }
 
-    common_chat_msg parse(const std::string & msg, bool is_partial) {
-        common_chat_parser_params parser_params;
-        parser_params.format = params_.format;
+    common_chat_msg parse(const std::string & msg, bool is_partial) const {
+        common_chat_parser_params parser_params(params_);
+        parser_params.debug = detailed_debug_;
         return common_chat_peg_parse(arena_, msg, is_partial, parser_params);
     }
 };
 
-static void test_peg_parser(common_chat_templates * tmpls, const std::function<void(peg_test_case &)> & init) {
+// Global template filter for --template flag
+static std::string g_template_filter;
+
+// When true, run reconstruction test on every non-partial test and report results
+static bool g_force_reconstruction_test = false;
+
+static void test_peg_parser(common_chat_templates *                      tmpls,
+                            const std::function<void(peg_test_case &)> & init,
+                            bool                                         detailed_debug) {
+    // UTF-8-safe truncation helper (same as in test_parser_with_streaming)
+    constexpr auto utf8_truncate_safe_len = [](const std::string_view s) -> size_t {
+        auto len = s.size();
+        if (len == 0) {
+            return 0;
+        }
+        auto i = len;
+        for (size_t back = 0; back < 4 && i > 0; ++back) {
+            --i;
+            unsigned char c = s[i];
+            if ((c & 0x80) == 0) {
+                return len;
+            }
+            if ((c & 0xC0) == 0xC0) {
+                size_t expected_len = 0;
+                if ((c & 0xE0) == 0xC0) {
+                    expected_len = 2;
+                } else if ((c & 0xF0) == 0xE0) {
+                    expected_len = 3;
+                } else if ((c & 0xF8) == 0xF0) {
+                    expected_len = 4;
+                } else {
+                    return i;
+                }
+                if (len - i >= expected_len) {
+                    return len;
+                }
+                return i;
+            }
+        }
+        return len - std::min(len, size_t(3));
+    };
+
     peg_test_case tc;
     init(tc);
     if (tc.params.messages.empty()) {
-        tc.params.messages = {message_user};
+        tc.params.messages = { message_user };
     }
     if (tc.expect.role.empty()) {
         tc.expect.role = "assistant";
     }
 
-    auto parser = make_peg_parser(tmpls, tc.params);
+    auto parser = make_peg_parser(tmpls, tc.params, detailed_debug);
+    if (detailed_debug) {
+        LOG_DBG("Using parser: \n%s\n", parser.arena_.dump(parser.arena_.root()).c_str());
+        LOG_DBG("Generation prompt: '%s'\n", parser.params_.generation_prompt.c_str());
+    }
 
     common_chat_msg msg_accum;
     common_chat_msg msg_prev;
     msg_accum.role = msg_prev.role = "assistant";
 
     for (size_t i = 1; i <= tc.input.size(); ++i) {
-        auto is_partial = i < tc.input.size();
-        common_chat_msg msg_current = parser.parse(tc.input.substr(0, i), is_partial);
+        auto            is_partial  = i < tc.input.size() || tc.is_partial;
+        // Use UTF-8 safe truncation to avoid corrupting multi-byte characters
+        size_t          safe_len    = utf8_truncate_safe_len(std::string_view(tc.input).substr(0, i));
+        std::string     prefix      = tc.input.substr(0, safe_len);
+        common_chat_msg msg_current = parser.parse(prefix, is_partial);
 
         for (const auto & diff : common_chat_msg_diff::compute_diffs(msg_prev, msg_current)) {
             if (!diff.reasoning_content_delta.empty()) {
@@ -605,24 +1020,390 @@ static void test_peg_parser(common_chat_templates * tmpls, const std::function<v
                 msg_accum.content += diff.content_delta;
             }
             if (diff.tool_call_index != std::string::npos) {
+                // During partial parsing, a new tool call may appear with empty name initially
+                // The name gets filled in as more input is parsed
+                while (msg_accum.tool_calls.size() <= diff.tool_call_index) {
+                    msg_accum.tool_calls.push_back({ "", "", "" });
+                }
+                // Always update name and id from diff (may change during incremental parsing), but only if the delta
+                // actually contains them
                 if (!diff.tool_call_delta.name.empty()) {
-                    msg_accum.tool_calls.push_back({diff.tool_call_delta.name, "", diff.tool_call_delta.id});
+                    msg_accum.tool_calls[diff.tool_call_index].name = diff.tool_call_delta.name;
+                }
+                if (!diff.tool_call_delta.id.empty()) {
+                    msg_accum.tool_calls[diff.tool_call_index].id = diff.tool_call_delta.id;
                 }
                 if (!diff.tool_call_delta.arguments.empty()) {
-                    msg_accum.tool_calls.back().arguments += diff.tool_call_delta.arguments;
+                    msg_accum.tool_calls[diff.tool_call_index].arguments += diff.tool_call_delta.arguments;
                 }
             }
         }
-        assert_msg_equals(msg_current, msg_accum, true);
+        try {
+            assert_msg_equals(msg_current, msg_accum, true);
+        } catch (std::exception & e) {
+            throw std::runtime_error((std::string("Error comparing accumulated message to current: ") + e.what()).c_str());
+        }
+
         msg_prev = msg_current;
     }
 
-    assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    if (!tc.is_partial) {
+        assert_msg_equals(tc.expect, parser.parse(tc.input, false), true);
+    }
     assert_msg_equals(tc.expect, msg_accum, true);
+
+    // Test grammar if present in params
+    if (!parser.params_.grammar.empty()) {
+        auto grammar = build_grammar(parser.params_.grammar);
+        if (!grammar) {
+            throw std::runtime_error("Failed to build grammar: " + parser.params_.grammar);
+        }
+
+        // In production, grammar triggers match against the full generated text
+        // including the generation prompt. All positions are in full_input coordinates.
+        const auto & gen_prompt = parser.params_.generation_prompt;
+        std::string full_input = gen_prompt + tc.input;
+
+        // Determine whether the reasoning-budget sampler path applies: tool-call grammar
+        // with all WORD triggers and thinking tags present. In production, the reasoning
+        // budget sampler inhibits grammar application while inside thinking blocks —
+        // triggers inside <think>...</think> are suppressed.
+        bool use_reasoning_budget_path = false;
+        if (parser.params_.grammar_lazy && !parser.params_.thinking_end_tag.empty()) {
+            use_reasoning_budget_path = true;
+            for (const auto & trigger : parser.params_.grammar_triggers) {
+                if (trigger.type != COMMON_GRAMMAR_TRIGGER_TYPE_WORD) {
+                    use_reasoning_budget_path = false;
+                    break;
+                }
+            }
+        }
+
+        // Find the earliest trigger position to determine the constrained portion
+        auto earliest_trigger_pos = std::string::npos;
+
+        if (use_reasoning_budget_path) {
+            // Reasoning-budget path: simulate thinking-aware trigger detection.
+            // Walk through full_input tracking thinking state; only match triggers
+            // when outside thinking blocks.
+            const auto & think_start = parser.params_.thinking_start_tag;
+            const auto & think_end   = parser.params_.thinking_end_tag;
+
+            bool in_thinking = false;
+            for (size_t i = 0; i < full_input.size(); ++i) {
+                if (!in_thinking && !think_start.empty()
+                        && full_input.compare(i, think_start.size(), think_start) == 0) {
+                    in_thinking = true;
+                    i += think_start.size() - 1;
+                    continue;
+                }
+                if (in_thinking && full_input.compare(i, think_end.size(), think_end) == 0) {
+                    in_thinking = false;
+                    i += think_end.size() - 1;
+                    continue;
+                }
+                if (in_thinking) {
+                    continue;
+                }
+                // Outside thinking — check if any trigger word starts here
+                for (const auto & trigger : parser.params_.grammar_triggers) {
+                    if (full_input.compare(i, trigger.value.size(), trigger.value) == 0) {
+                        if (earliest_trigger_pos == std::string::npos || i < earliest_trigger_pos) {
+                            earliest_trigger_pos = i;
+                        }
+                    }
+                }
+                if (earliest_trigger_pos != std::string::npos) {
+                    break;  // found the earliest
+                }
+            }
+
+            // If the reasoning-budget path found no trigger outside thinking but the test
+            // expects tool calls, this template nests tool calls inside thinking
+            // blocks (e.g. Kimi). Fall back to the legacy path for this case.
+            if (earliest_trigger_pos == std::string::npos && !tc.expect.tool_calls.empty()) {
+                use_reasoning_budget_path = false;
+            }
+        }
+
+        if (!use_reasoning_budget_path) {
+            // Legacy path: find triggers without thinking-awareness
+            for (const auto & trigger : parser.params_.grammar_triggers) {
+                size_t      pos = std::string::npos;
+                std::smatch match;
+                switch (trigger.type) {
+                    case COMMON_GRAMMAR_TRIGGER_TYPE_WORD:
+                        {
+                            const auto & word = trigger.value;
+                            pos               = full_input.find(word);
+                            break;
+                        }
+                    case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN:
+                        {
+                            const auto & compiled = std::regex(trigger.value);
+                            if (std::regex_search(full_input, match, compiled)) {
+                                pos = match.position(compiled.mark_count());
+                            }
+                            break;
+                        }
+                    case COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL:
+                        {
+                            // In production, PATTERN_FULL triggers are checked against
+                            // the text generated so far, growing token by token. Simulate
+                            // by trying every prefix of full_input.
+                            const auto & compiled = std::regex(trigger.value);
+                            for (size_t end = gen_prompt.size(); end <= full_input.size(); ++end) {
+                                std::string prefix = full_input.substr(0, end);
+                                if (std::regex_match(prefix, match, compiled)) {
+                                    pos = std::string::npos;
+                                    for (size_t gi = 1; gi < match.size(); ++gi) {
+                                        if (match[gi].length() > 0) {
+                                            pos = match.position(gi);
+                                            break;
+                                        }
+                                    }
+                                    if (pos == std::string::npos) {
+                                        pos = match.position(0);
+                                    }
+                                    break;
+                                }
+                            }
+                            break;
+                        }
+                    default:
+                        throw std::runtime_error("Unknown trigger type");
+                }
+                if (pos != std::string::npos) {
+                    if (earliest_trigger_pos == std::string::npos || pos < earliest_trigger_pos) {
+                        earliest_trigger_pos = pos;
+                    }
+                }
+            }
+        }
+
+        // If the test expects tool calls and the grammar is lazy, the trigger must fire.
+        // Otherwise the grammar would never activate in production and tool calls wouldn't
+        // be constrained. A silent skip here would hide broken triggers.
+        if (parser.params_.grammar_lazy && !tc.expect.tool_calls.empty() && !tc.is_partial
+                && earliest_trigger_pos == std::string::npos) {
+            std::string trigger_desc;
+            for (const auto & trigger : parser.params_.grammar_triggers) {
+                trigger_desc += "\n  [type=" + std::to_string(trigger.type) + "] " + trigger.value;
+            }
+            throw std::runtime_error(
+                "Grammar trigger did not fire, but test expects tool calls (lazy grammar).\n"
+                ">>> Input: " + full_input + "\n"
+                ">>> Triggers (" + std::to_string(parser.params_.grammar_triggers.size()) + "):" + trigger_desc);
+        }
+
+        // Determine the constrained portion of input to test against grammar.
+        // If the trigger position falls inside the generation prompt, the grammar
+        // sampler was already active before model output began — constrain from the
+        // start of the model output (i.e. tc.input).
+        std::string constrained = full_input;
+        bool grammar_triggered = false;
+        if (earliest_trigger_pos != std::string::npos) {
+            auto constrain_from = std::max(earliest_trigger_pos, gen_prompt.size());
+            constrained = full_input.substr(constrain_from);
+            grammar_triggered = true;
+        } else if (!parser.params_.grammar_lazy) {
+            // For non-lazy grammars, the entire input should match
+            grammar_triggered = true;
+        }
+
+        // Test the constrained portion against the grammar
+        if (grammar_triggered && !tc.is_partial) {
+            auto result = match_string_detailed(constrained, grammar.get());
+            if (!result.success) {
+                std::string error_msg;
+                if (result.incomplete) {
+                    error_msg =
+                        "Grammar matched all input but expects more:\n\n"
+                        ">>> Input: " + tc.input +
+                        "\n\n>>> Constrained: " + constrained +
+                        "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+                        std::to_string(result.matched_codepoints) + " codepoints): " +
+                        (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+                        "\n\n>>> Expected next: " + result.expected_description +
+                        "\n\n>>> Grammar: " + parser.params_.grammar;
+                } else {
+                    error_msg =
+                        "Grammar match failed:\n\n"
+                        ">>> Input: " + tc.input +
+                        "\n\n>>> Constrained: " + constrained +
+                        "\n\n>>> Matched prefix (" + std::to_string(result.matched_bytes) + " bytes, " +
+                        std::to_string(result.matched_codepoints) + " codepoints): " +
+                        (result.matched_prefix.size() > 100 ? result.matched_prefix.substr(0, 100) + "..." : result.matched_prefix) +
+                        "\n\n>>> Failing character: " + result.failing_char +
+                        "\n\n>>> Expected: " + result.expected_description +
+                        "\n\n>>> Grammar: " + parser.params_.grammar;
+                }
+                throw std::runtime_error(error_msg);
+            }
+        }
+    }
+
+    // Reconstruction test: verify that appending the parsed message to the original
+    // messages and re-rendering the template (without generation prompt) reproduces
+    // the original prompt + input exactly, or as a proper prefix (the template may
+    // append end-of-turn tokens after the assistant message).
+    if ((tc.expect_reconstruction || g_force_reconstruction_test) && !tc.is_partial) {
+        // Start from tc.expect but copy tool call arguments from the actual parser
+        // output, which preserves original JSON formatting (e.g. {"arg1":1} vs {"arg1": 1}).
+        auto reconstruction_msg = tc.expect;
+        auto parsed_msg         = parser.parse(tc.input, false);
+        for (size_t i = 0; i < reconstruction_msg.tool_calls.size() && i < parsed_msg.tool_calls.size(); i++) {
+            reconstruction_msg.tool_calls[i].arguments = parsed_msg.tool_calls[i].arguments;
+        }
+        common_chat_templates_inputs reconstruction_inputs = tc.params;
+        reconstruction_inputs.messages.push_back(reconstruction_msg);
+        reconstruction_inputs.add_generation_prompt = false;
+
+        auto reconstruction_params = common_chat_templates_apply(tmpls, reconstruction_inputs);
+        std::string expected_text  = parser.params_.prompt + tc.input;
+        bool match = reconstruction_params.prompt == expected_text ||
+            (reconstruction_params.prompt.size() > expected_text.size() &&
+             reconstruction_params.prompt.compare(0, expected_text.size(), expected_text) == 0);
+        if (!match && g_force_reconstruction_test && !tc.expect_reconstruction) {
+            // In forced mode, report mismatch but don't fail
+            // Find the first difference position
+            size_t diff_pos = 0;
+            size_t min_len  = std::min(expected_text.size(), reconstruction_params.prompt.size());
+            while (diff_pos < min_len && expected_text[diff_pos] == reconstruction_params.prompt[diff_pos]) {
+                diff_pos++;
+            }
+            size_t ctx_start = diff_pos > 60 ? diff_pos - 60 : 0;
+            size_t ctx_end_e = std::min(expected_text.size(), diff_pos + 40);
+            size_t ctx_end_r = std::min(reconstruction_params.prompt.size(), diff_pos + 40);
+            LOG_ERR("\x1b[31m[RECONSTRUCTION FAIL]\x1b[0m "
+                    "first diff at byte %zu (expected len=%zu, reconstructed len=%zu)\n"
+                    "  expected:      ...%s...\n"
+                    "  reconstructed: ...%s...\n",
+                    diff_pos, expected_text.size(), reconstruction_params.prompt.size(),
+                    expected_text.substr(ctx_start, ctx_end_e - ctx_start).c_str(),
+                    reconstruction_params.prompt.substr(ctx_start, ctx_end_r - ctx_start).c_str());
+        } else if (!match) {
+            std::string error_msg =
+                "Reconstruction mismatch:\n\n"
+                ">>> Expected (prompt + input):\n" + expected_text +
+                "\n\n>>> Reconstructed:\n" + reconstruction_params.prompt;
+            throw std::runtime_error(error_msg);
+        } else if (g_force_reconstruction_test) {
+            LOG_INF("\x1b[32m[RECONSTRUCTION OK]\x1b[0m\n");
+        }
+    }
+}
+
+// Fluent builder for PEG parser tests
+class peg_test_builder;
+
+class peg_tester {
+    common_chat_templates_ptr tmpls_;
+    std::string               template_path_;
+    bool                      detailed_debug_;
+    friend class peg_test_builder;
+
+  public:
+    explicit peg_tester(const std::string & template_path, const bool detailed_debug = false) :
+        tmpls_(read_templates(template_path)),
+        template_path_(template_path),
+        detailed_debug_(detailed_debug) {}
+
+    const std::string & template_path() const { return template_path_; }
+
+    peg_test_builder test(const std::string & input);
+};
+
+class peg_test_builder {
+    peg_tester &  tester_;
+    peg_test_case tc_;
+
+  public:
+    peg_test_builder(peg_tester & tester, const std::string & input) : tester_(tester) { tc_.input = input; }
+
+    // Parameter setters
+    peg_test_builder & reasoning_format(common_reasoning_format fmt) {
+        tc_.params.reasoning_format = fmt;
+        return *this;
+    }
+
+    peg_test_builder & tools(std::vector<common_chat_tool> tools) {
+        tc_.params.tools = std::move(tools);
+        return *this;
+    }
+
+    peg_test_builder & enable_thinking(bool val) {
+        tc_.params.enable_thinking = val;
+        return *this;
+    }
+
+    peg_test_builder & parallel_tool_calls(bool val) {
+        tc_.params.parallel_tool_calls = val;
+        return *this;
+    }
+
+    peg_test_builder & json_schema(const std::string & schema) {
+        tc_.params.json_schema = schema;
+        return *this;
+    }
+
+    peg_test_builder & is_partial(bool val) {
+        tc_.is_partial = val;
+        return *this;
+    }
+
+    peg_test_builder & expect_reconstruction(bool val = true) {
+        tc_.expect_reconstruction = val;
+        return *this;
+    }
+
+    // Expect setters
+    peg_test_builder & expect(const common_chat_msg & msg) {
+        tc_.expect = msg;
+        return *this;
+    }
+
+    peg_test_builder & expect_content(const std::string & content) {
+        tc_.expect.content = content;
+        return *this;
+    }
+
+    peg_test_builder & expect_reasoning(const std::string & reasoning) {
+        tc_.expect.reasoning_content = reasoning;
+        return *this;
+    }
+
+    peg_test_builder & expect_tool_calls(std::vector<common_chat_tool_call> calls) {
+        tc_.expect.tool_calls = std::move(calls);
+        return *this;
+    }
+
+    // Execute the test
+    void run() {
+        // Check template filter
+        if (!g_template_filter.empty()) {
+            // Case-insensitive substring match
+            std::string template_path_lower = tester_.template_path();
+            std::string filter_lower        = g_template_filter;
+            std::transform(template_path_lower.begin(), template_path_lower.end(), template_path_lower.begin(),
+                           ::tolower);
+            std::transform(filter_lower.begin(), filter_lower.end(), filter_lower.begin(), ::tolower);
+            if (template_path_lower.find(filter_lower) == std::string::npos) {
+                // Skip this test
+                return;
+            }
+        }
+        LOG_INF("\n\x1b[38;5;126m[%s]\x1b[0m\n%s\n\n", tester_.template_path().c_str(), tc_.input.c_str());
+        test_peg_parser(tester_.tmpls_.get(), [this](peg_test_case & t) { t = tc_; }, tester_.detailed_debug_);
+    }
+};
+
+peg_test_builder peg_tester::test(const std::string & input) {
+    return peg_test_builder(*this, input);
 }
 
 static void test_msgs_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_msg> msgs{
         message_user,
         message_user_parts,
@@ -633,54 +1414,48 @@ static void test_msgs_oaicompat_json_conversion() {
         message_assist_call_id,
         message_assist_call_idx,
         message_assist_call_python,
-        message_assist_call_code_interpreter,
     };
     for (const auto & msg : msgs) {
-        auto oai_json = common_chat_msgs_to_json_oaicompat({msg});
-        auto msgs2 = common_chat_msgs_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_msgs_to_json_oaicompat({ msg });
+        auto msgs2    = common_chat_msgs_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, msgs2.size());
-        auto msg2 = msgs2[0];
+        const auto & msg2 = msgs2[0];
         assert_msg_equals(msg, msg2);
     }
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"user\",\n"
-            "    \"content\": [\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"Hey\"\n"
-            "      },\n"
-            "      {\n"
-            "        \"type\": \"text\",\n"
-            "        \"text\": \"there\"\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_user_parts}).dump(2));
-
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"role\": \"assistant\",\n"
-            "    \"content\": \"\",\n"
-            "    \"tool_calls\": [\n"
-            "      {\n"
-            "        \"type\": \"function\",\n"
-            "        \"function\": {\n"
-            "          \"name\": \"python\",\n"
-            "          \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
-            "        }\n"
-            "      }\n"
-            "    ]\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_msgs_to_json_oaicompat({message_assist_call_python}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"user\",\n"
+                              "    \"content\": [\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"Hey\"\n"
+                              "      },\n"
+                              "      {\n"
+                              "        \"type\": \"text\",\n"
+                              "        \"text\": \"there\"\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_user_parts }).dump(2));
+
+    // Note: content is "" instead of null due to workaround for templates that render null as "None"
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"role\": \"assistant\",\n"
+                              "    \"content\": \"\",\n"
+                              "    \"tool_calls\": [\n"
+                              "      {\n"
+                              "        \"type\": \"function\",\n"
+                              "        \"function\": {\n"
+                              "          \"name\": \"python\",\n"
+                              "          \"arguments\": \"{\\\"code\\\":\\\"print('hey')\\\"}\"\n"
+                              "        }\n"
+                              "      }\n"
+                              "    ]\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_msgs_to_json_oaicompat({ message_assist_call_python }).dump(2));
 
     auto res = common_chat_msgs_parse_oaicompat(json::parse("[{\"role\": \"assistant\", \"tool_calls\": []}]"));
     assert_equals<size_t>(1, res.size());
@@ -699,16 +1474,15 @@ static void test_msgs_oaicompat_json_conversion() {
 }
 
 static void test_tools_oaicompat_json_conversion() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     std::vector<common_chat_tool> tools{
         special_function_tool,
         python_tool,
-        code_interpreter_tool,
     };
 
     for (const auto & tool : tools) {
-        auto oai_json = common_chat_tools_to_json_oaicompat({tool});
-        auto tools2 = common_chat_tools_parse_oaicompat(oai_json);
+        auto oai_json = common_chat_tools_to_json_oaicompat({ tool });
+        auto tools2   = common_chat_tools_parse_oaicompat(oai_json);
         assert_equals((size_t) 1, tools2.size());
         auto tool2 = tools2[0];
         assert_equals(tool.name, tool2.name);
@@ -716,3040 +1490,2531 @@ static void test_tools_oaicompat_json_conversion() {
         assert_equals(json::parse(tool.parameters).dump(2), json::parse(tool2.parameters).dump(2));
     }
 
-    assert_equals(
-        std::string(
-            "[\n"
-            "  {\n"
-            "    \"type\": \"function\",\n"
-            "    \"function\": {\n"
-            "      \"name\": \"special_function\",\n"
-            "      \"description\": \"I'm special\",\n"
-            "      \"parameters\": {\n"
-            "        \"type\": \"object\",\n"
-            "        \"properties\": {\n"
-            "          \"arg1\": {\n"
-            "            \"type\": \"integer\",\n"
-            "            \"description\": \"The arg.\"\n"
-            "          }\n"
-            "        },\n"
-            "        \"required\": [\n"
-            "          \"arg1\"\n"
-            "        ]\n"
-            "      }\n"
-            "    }\n"
-            "  }\n"
-            "]"
-        ),
-        common_chat_tools_to_json_oaicompat({special_function_tool}).dump(2));
+    assert_equals(std::string("[\n"
+                              "  {\n"
+                              "    \"type\": \"function\",\n"
+                              "    \"function\": {\n"
+                              "      \"name\": \"special_function\",\n"
+                              "      \"description\": \"I'm special\",\n"
+                              "      \"parameters\": {\n"
+                              "        \"type\": \"object\",\n"
+                              "        \"properties\": {\n"
+                              "          \"arg1\": {\n"
+                              "            \"type\": \"integer\",\n"
+                              "            \"description\": \"The arg.\"\n"
+                              "          }\n"
+                              "        },\n"
+                              "        \"required\": [\n"
+                              "          \"arg1\"\n"
+                              "        ]\n"
+                              "      }\n"
+                              "    }\n"
+                              "  }\n"
+                              "]"),
+                  common_chat_tools_to_json_oaicompat({ special_function_tool }).dump(2));
+}
+
+static void test_template_output_peg_parsers(bool detailed_debug) {
+    LOG_DBG("%s\n", __func__);
+
+    // JSON schemas
+    const char * invoice_schema = R"({
+        "type": "object",
+        "properties": {
+            "amount": {"type": "number"},
+            "date": {"type": "string"}
+        }
+    })";
 
     {
-        auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
-        assert_equals((size_t) 1, tools_no_params.size());
-        assert_equals(std::string("test_func"), tools_no_params[0].name);
-        assert_equals(std::string("A test"), tools_no_params[0].description);
-        assert_equals(std::string("{}"), tools_no_params[0].parameters);
-    }
-    {
-        auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
-        assert_equals((size_t) 1, tools_no_desc.size());
-        assert_equals(std::string("test_func"), tools_no_desc[0].name);
-        assert_equals(std::string(""), tools_no_desc[0].description);
+        // Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
+        auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
+
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .expect(message_assist_thoughts)
+            .run();
+
+                tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("<think>\nI'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "I'm\nthinking\n</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "<parameter=arg2>\n2\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        tst.test(
+               "I need to output the invoice details in JSON\n"
+               "</think>\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
+
+        // tool call segment in reasoning
+        tst.test(
+               "Let's call a tool: <tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Not the real call!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call></think>\n"
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>"
+            )
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_reasoning("Let's call a tool: <tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Not the real call!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        // No args tool
+        tst.test(
+               "<tool_call>\n"
+               "<function=empty_args>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ empty_args_tool })
+            .expect(message_with_tool_calls("empty_args", "{}"))
+            .run();
+
+        // No args tool with no properties defined
+        tst.test(
+               "<tool_call>\n"
+               "<function=empty_args_no_props>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ empty_args_tool_no_properties })
+            .expect(message_with_tool_calls("empty_args_no_props", "{}"))
+            .run();
     }
+
     {
-        auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func"}}])"));
-        assert_equals((size_t) 1, tools_minimal.size());
-        assert_equals(std::string("test_func"), tools_minimal[0].name);
-        assert_equals(std::string(""), tools_minimal[0].description);
-        assert_equals(std::string("{}"), tools_minimal[0].parameters);
+        // Ministral-3-14B-Reasoning-2512
+        auto tst = peg_tester("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .expect_content("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .expect_reconstruction()
+            .run();
+
+        tst.test("[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .expect(message_assist_thoughts)
+            .expect_reconstruction()
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "[THINK]I'm\nthinking[/THINK]"
+               R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
+                 R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        tst.test(
+               "[THINK]I need to output the invoice details in JSON[/THINK]"
+               "```json\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})"
+               "\n```")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
+
+        // fake tool call marker in reasoning
+        tst.test(
+               "[THINK]Let me think about [TOOL_CALLS]special_function[ARGS]{\"arg1\":1} and more[/THINK]"
+               R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .tools({ special_function_tool })
+            .expect_reasoning("Let me think about [TOOL_CALLS]special_function[ARGS]{\"arg1\":1} and more")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+            })
+            .expect_reconstruction()
+            .run();
     }
-}
 
-// for compat; ref: https://github.com/ggml-org/llama.cpp/pull/18961
-struct test_parser_params {
-    common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
-    common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
-    bool                     reasoning_in_content  = false;
-    bool                     thinking_forced_open  = false;
-    bool                     parse_tool_calls      = true;
-};
+    {
+        // NVIDIA Nemotron-3 Nano
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").
+            enable_thinking(false).
+            reasoning_format(COMMON_REASONING_FORMAT_AUTO).
+            expect(message_assist).run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("<think>\nI'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .run();
+
+        tst.test("I'm\nthinking\n</think>\nHello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "I'm\nthinking\n</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n1\n</parameter>\n"
+               "<parameter=arg2>\n2\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
+
+        tst.test(
+               "I need to output the invoice details in JSON\n"
+               "</think>\n"
+               R"({"amount": 123.45, "date": "2025-12-03"})")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
+
+        // tool call segment in reasoning
+        tst.test(
+               "Let's call a tool: <tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Not the real call!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call></think>\n"
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>"
+            )
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                python_tool
+        })
+            .expect_reasoning("Let's call a tool: <tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Not the real call!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .run();
 
-static common_chat_msg test_chat_parse(const std::string & input, bool is_partial, const test_parser_params & syntax) {
-    common_chat_parser_params params;
-    params.format               = syntax.format;
-    params.reasoning_format     = syntax.reasoning_format;
-    params.reasoning_in_content = syntax.reasoning_in_content;
-    params.thinking_forced_open = syntax.thinking_forced_open;
-    params.parse_tool_calls     = syntax.parse_tool_calls;
-    return common_chat_parse(input, is_partial, params);
-}
+    }
 
-static void test_template_output_parsers() {
-    printf("[%s]\n", __func__);
+    {
+        // CohereForAI Command-R 7B (2024-tool_use)
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>")
+            .expect(message_assist_thoughts_unparsed_r7b)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", ")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(message_assist_thoughts_partial_call)
+            .run();
+
+        tst.test(
+               "<|START_THINKING|><|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_idx)
+            .run();
+    }
 
-    common_chat_templates_inputs inputs_no_tools;
-    inputs_no_tools.messages                = {message_user};
+    {
+        // Google Gemma 2 2B - does not support tool calling
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja");
 
-    common_chat_templates_inputs inputs_tools;
-    inputs_tools.messages                   = {message_user};
-    inputs_tools.tools                      = {special_function_tool};
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).expect_reconstruction().run();
 
-    common_chat_templates_inputs inputs_tools_builtin;
-    inputs_tools_builtin.messages           = {message_user};
-    inputs_tools_builtin.tools              = {python_tool};
+        tst.test("Line 1\nLine 2\nLine 3").expect(simple_assist_msg("Line 1\nLine 2\nLine 3")).expect_reconstruction().run();
+    }
 
     {
-        // Not supported yet
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+        // Google Gemma 4 (tool calling with Gemma4 dict format)
+        auto tst = peg_tester("models/templates/google-gemma-4-31B-it.jinja");
+
+        tst.test("Hello, world!").expect(simple_assist_msg("Hello, world!")).run();
+
+        // Reasoning and content
+        tst.test(
+                "<|channel>thought\nI'm\nthinking<channel|>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Empty reasoning (budget=0: sampler forces end tag before newline)
+        tst.test(
+                "<|channel>thought<channel|>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(simple_assist_msg("Hello, world!\nWhat's up?", ""))
+            .run();
+
+        // Reasoning and content with reasoning_format = none
+        tst.test(
+                "<|channel>thought\nI'm\nthinking<channel|>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("<|channel>thought\nI'm\nthinking<channel|>Hello, world!\nWhat's up?")
+            .run();
+
+        // Simple tool call with string argument
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>London<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", R"({"city": "London"})"))
+            .run();
+
+        // Tool call with string argument containing special chars
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>San Francisco<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", R"({"city": "San Francisco"})"))
+            .run();
+
+        // Tool call with empty args
+        tst.test(
+                "<|tool_call>call:empty_args{}<tool_call|>")
+            .tools({ empty_args_tool })
+            .expect(message_with_tool_calls("empty_args", "{}"))
+            .run();
+
+        // Tool call with string and content
+        tst.test(
+                "Hello, world!\nWhat's up?<|tool_call>call:get_time{city:<|\"|>Paris<|\"|>}<tool_call|>")
+            .tools({ get_time_tool })
+            .expect(message_with_content_and_tool_call("Hello, world!\nWhat's up?", "get_time", R"({"city": "Paris"})"))
+            .run();
+
+        // Parallel tool calls
+        tst.test(
+                "<|tool_call>call:get_time{city:<|\"|>London<|\"|>}<tool_call|>"
+                "<|tool_call>call:get_weather{city:<|\"|>Paris<|\"|>}<tool_call|>")
+            .tools({ get_time_tool, get_weather_tool })
+            .parallel_tool_calls(true)
+            .expect_tool_calls({
+                { "get_time", R"({"city": "London"})", "" },
+                { "get_weather", R"({"city": "Paris"})", "" },
+            })
+            .run();
+
+        // Tool call with integer argument (number type)
+        tst.test(
+                "<|tool_call>call:special_function{arg1:42}<tool_call|>")
+            .tools({ special_function_tool })
+            .expect(message_with_tool_calls("special_function", R"({"arg1": 42})"))
+            .run();
+
+        // Tool call with negative number argument
+        tst.test(
+                "<|tool_call>call:special_function{arg1:-7}<tool_call|>")
+            .tools({ special_function_tool })
+            .expect(message_with_tool_calls("special_function", R"({"arg1": -7})"))
+            .run();
+
+        // Tool call with decimal number argument
+        tst.test(
+                "<|tool_call>call:amount{orig:3.14}<tool_call|>")
+            .tools({ amount_tool })
+            .expect(message_with_tool_calls("amount", R"({"orig": 3.14})"))
+            .run();
+
+        // Tool call with boolean argument (true)
+        tst.test(
+                "<|tool_call>call:toggle{enabled:true}<tool_call|>")
+            .tools({ toggle_tool })
+            .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
+            .run();
+
+        // Tool call with boolean argument (false)
+        tst.test(
+                "<|tool_call>call:toggle{enabled:false}<tool_call|>")
+            .tools({ toggle_tool })
+            .expect(message_with_tool_calls("toggle", R"({"enabled": false})"))
+            .run();
+
+        // Tool call with null argument
+        tst.test(
+                "<|tool_call>call:set_nullable{value:null}<tool_call|>")
+            .tools({ nullable_tool })
+            .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
+            .run();
+
+        // Tool call with array argument (todo list)
+        tst.test(
+                "<|tool_call>call:todo_list{todos:[<|\"|>buy milk<|\"|>,<|\"|>walk dog<|\"|>]}<tool_call|>")
+            .tools({ todo_list })
+            .expect(message_with_tool_calls("todo_list", R"({"todos":["buy milk","walk dog"]})"))
+            .run();
+
+        // Tool call with object/dict argument
+        tst.test(
+                "<|tool_call>call:set_config{config:{theme:<|\"|>dark<|\"|>,count:3}}<tool_call|>")
+            .tools({ config_tool })
+            .expect(message_with_tool_calls("set_config", R"({"config":{"theme":"dark","count":3}})"))
+            .run();
+
+        // Tool call with empty array
+        tst.test(
+                "<|tool_call>call:todo_list{todos:[]}<tool_call|>")
+            .tools({ todo_list })
+            .expect(message_with_tool_calls("todo_list", R"({"todos":[]})"))
+            .run();
+
+        // Tool call with empty dict
+        tst.test(
+                "<|tool_call>call:set_config{config:{}}<tool_call|>")
+            .tools({ config_tool })
+            .expect(message_with_tool_calls("set_config", R"({"config":{}})"))
+            .run();
+
+        // Tool call with scientific notation number
+        tst.test(
+                "<|tool_call>call:amount{orig:1.5e10}<tool_call|>")
+            .tools({ amount_tool })
+            .expect(message_with_tool_calls("amount", R"({"orig": 1.5e10})"))
+            .run();
+
+        // Edge cases
+        tst.test(
+                "<|channel>thought\n<channel|>Hello, world!\nWhat's up?<channel|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist)
+            .run();
+
+        tst.test(
+                "<|channel>thought\n<channel|>Hello, world!\nWhat's up?<|channel>thought\n<channel|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist)
+            .run();
+
+        tst.test(
+                "<|channel>thought\n<channel|>Hello, world!\nWhat's up?<|channel>thought\n<channel|><channel|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist)
+            .run();
+
+        tst.test(
+                "<|channel><|channel>thought\n<channel|>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist)
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja");
-        std::vector<std::string>   end_tokens{ "<|END_OF_TURN_TOKEN|>" };
+        // Qwen-QwQ-32B (reasoning model)
+        auto tst = peg_tester("models/templates/Qwen-QwQ-32B.jinja");
 
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_COMMAND_R7B, params.format);
-            assert_equals(false, params.thinking_forced_open);
-        }
+        // QwQ always has thinking forced open - input starts after the <think>\n in the prompt
+        tst.test("Let me think about this...\n</think>\nThe answer is 42.")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(simple_assist_msg("The answer is 42.", "Let me think about this..."))
+            .run();
 
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_r7b,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_COMMAND_R7B}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_call_idx,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                "]<|END_ACTION|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
-                "<|START_ACTION|>[\n"
-                "    {\"tool_call_id\": \"0\", \"tool_name\": \"special",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
-                      "<|START_THINKING|><|END_THINKING|>"
-                      "<|START_ACTION|>[\n"
-                      "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
-                      "]<|END_ACTION|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      COMMON_REASONING_FORMAT_DEEPSEEK);
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<|START_RESPONSE|>Hello, world!\n"
-                      "What's up?<|END_RESPONSE|>",
-                      /* expect_grammar_triggered= */ false);
-    }
-    // TODO @ngxson : generic tool calls is too costly to maintain, consider removing it in the future
+        tst.test("</think>Hello, world!").reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(simple_assist_msg("Hello, world!")).run();
+    }
     {
-        auto tmpls = read_templates("models/templates/google-gemma-2-2b-it.jinja");
-        std::vector<std::string>   end_tokens{ "<end_of_turn>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GENERIC,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/microsoft-Phi-3.5-mini-instruct.jinja").get(),
-                          inputs_tools)
-                          .format);
-
-        // Generic tool calls doesn't generate / parse content-only messages symmetrically.
-
-        assert_equals(
-            simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GENERIC,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"t",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            simple_assist_msg("", "", "puppeteer_screenshot", "{\"name\":\"servethehome_homepage\","),
-            test_chat_parse(
-                R"({"tool_call": {"name": "puppeteer_screenshot", "arguments": {"name": "servethehome_homepage",)",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-        assert_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "{ \"tool_call\" : { \"name\" : \"special_function\", \"arguments\" : { \"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "{\n"
-                "  \"response\": \"Hello, world!\\nWhat's up?\"\n"
-                "}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GENERIC}));
-#if 0
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}");
-#endif
+        // NousResearch-Hermes-2-Pro and Hermes-3 (tool calling models)
+        auto tst = peg_tester("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja", detailed_debug);
+
+        tst.test(
+               "<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "Hello, world!\nWhat's up?<tool_call>\n"
+               "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .run();
+
+        // Note: Hermes template doesn't support thinking/reasoning natively
+        // Note: We only support one tool calling format per template, no alternate formats
     }
     {
-        auto tmpls = read_templates("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja");
-        std::vector<std::string>   end_tokens{ "</s>" };
+        // Test simple content-only template
+        auto tst = peg_tester("models/templates/google-gemma-2-2b-it.jinja", detailed_debug);
 
-        assert_equals(COMMON_CHAT_FORMAT_MISTRAL_NEMO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(
-            tmpls.get(), end_tokens, message_assist_call_id, tools,
-            "[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]");
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
     }
     {
-        assert_msg_equals(
-            simple_assist_msg("Réponse", "raisonnement"),
-            test_chat_parse(
-                message_assist_thoughts_unparsed_magistral.content,
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MAGISTRAL,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
+        // IBM Granite (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // TODO: pending support for WRAPPED_WITH_REASONING
+        // tst.test("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>")
+        //     .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+        //     .expect(message_assist_thoughts)
+        //     .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
+        // ByteDance-Seed-OSS (reasoning and tool calling model)
+        auto tst = peg_tester("models/templates/ByteDance-Seed-OSS.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        tst.test("<seed:think>I'm thinking about the answer</seed:think>\nHello, world!")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("Hello, world!", "I'm thinking about the answer"))
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>\n"
+               "<seed:tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>1</parameter>\n"
+               "<parameter=arg2>2</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .run();
+
+        // tool call with inside quotes
+        tst.test(
+               "<seed:tool_call>\n"
+               "<function=edit>\n"
+               "<parameter=filename>\n"
+               "foo.cpp\n"
+               "</parameter>\n"
+               "<parameter=oldString>"
+               "def foo(arg = \"14\"):\n"
+               "    return arg + \"bar\"\n"
+               "\n"
+               "</parameter>\n"
+               "<parameter=newString>"
+               "def foo(arg = \"15\"):\n"
+               "    pass\n"
+               "\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</seed:tool_call>")
+            .tools({
+                edit_tool
+        })
+            .expect_tool_calls({
+                { "edit", "{\"filename\": \"foo.cpp\", "
+                    "\"oldString\": \"def foo(arg = \\\"14\\\"):\\n    return arg + \\\"bar\\\"\\n\", "
+                    "\"newString\": \"def foo(arg = \\\"15\\\"):\\n    pass\\n\"}", {}
+                }
+            })
+            .run();
 
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
     }
+
     {
-        auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja").get(),
-                inputs_tools)
-                .format);
-        assert_equals(
-            COMMON_CHAT_FORMAT_HERMES_2_PRO,
-            common_chat_templates_apply(
-                read_templates("models/templates/Qwen-Qwen2.5-7B-Instruct.jinja").get(),
-                inputs_tools)
-                .format);
-
-        // Test parsing
-        assert_msg_equals(
-            simple_assist_msg("", "", "python", ""),
-            test_chat_parse(
-                "```json\n"
-                "<function_call> { \"name\" : \"python\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name\"",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("Let's call something\n"),
-            test_chat_parse(
-                "Let's call something\n"
-                "<tool_call>{\"name",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                // QwQ-32B's template adds a trailing <think> if add_generation_prompt
-                "I'm\nthinking</think>\n"
-                "<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function name=\"special_function\">\n"
-                "{\"arg1\": 1}\n"
-                "</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tool>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<tools>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tools>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<response>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "<response>\n"
-                "    {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</response>\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```xml\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "```",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "```json\n"
-                "\n"
-                "                    <function_call> {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}} \n"
-                "                    </function_call> \n"
-                "``` ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<json>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</json>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<xml>\n"
-                "  {\n"
-                "    \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}\n"
-                "  }\n"
-                "</xml>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<JSON>\n"
-                "  {\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</JSON>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\n  \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        // Test multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "";
-        message_assist_multiple_calls.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"python", "{\"code\":\"print('hello')\"}", ""});
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<tool_call>\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "{\"name\": \"python\", \"arguments\": {\"code\":\"print('hello')\"}}\n"
-                "</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}</function>\n"
-                "<function=python>{\"code\":\"print('hello')\"}</function>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "This is not a tool call:",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "This is not a tool call:\n"
-                "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_HERMES_2_PRO}));
-        // assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-        //     test_chat_parse(
-        //         "I'm\nthinking</think>Hello, world!\nWhat's up?",
-        //         COMMON_CHAT_FORMAT_HERMES_2_PRO));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ true,
-                    /* .thinking_forced_open = */ false,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>");
-
-        // Test multiple tool calls with template
-        common_chat_msg message_assist_multiple_calls_template;
-        message_assist_multiple_calls_template.role = "assistant";
-        message_assist_multiple_calls_template.content = "";
-        message_assist_multiple_calls_template.tool_calls.push_back({"special_function", "{\"arg1\": 1}", ""});
-        message_assist_multiple_calls_template.tool_calls.push_back({"python", "{\"code\":\"print('test')\"}", ""});
-
-        test_templates(tmpls.get(), end_tokens, message_assist_multiple_calls_template, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n"
-                      "</tool_call>\n"
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"print('test')\"}}\n"
-                      "</tool_call>");
-
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python_lines, tools,
-                      "<tool_call>\n"
-                      "{\"name\": \"python\", \"arguments\": {\"code\":\"# This is a program:\\nprint('hey')\"}}\n"
-                      "</tool_call>");
-        assert_msg_equals(
-            simple_assist_msg("", /* reasoning_content= */ "<tool_call>nah uhg</tool_call>"),
-            test_chat_parse(
-                "<think><tool_call>nah uhg</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
+        // Qwen3-Coder (tool calling with XML-style format)
+        auto tst = peg_tester("models/templates/Qwen3-Coder.jinja", detailed_debug);
+
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+
+        tst.test(
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n"
+               "1\n"
+               "</parameter>\n"
+               "<parameter=arg2>\n"
+               "2\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test with code content (multiline)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "def hello():\n"
+               "    print(\"Hello, world!\")\n"
+               "\n"
+               "hello()\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test with code content (asian unicode chars)
+        tst.test(
+               "<tool_call>\n"
+               "<function=python>\n"
+               "<parameter=code>\n"
+               "格\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                python_tool
+        })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"格\"}", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test with HTML tag content
+        tst.test(
+               "<tool_call>\n"
+               "<function=html>\n"
+               "<parameter=markup>\n"
+               "<html>\n"
+               " <head>\n"
+               "  <title>Hello!</title>\n"
+               " </head>\n"
+               "</html>\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                html_tool
+        })
+            .expect_tool_calls({
+                { "html", "{\"markup\": \"<html>\\n <head>\\n  <title>Hello!</title>\\n </head>\\n</html>\"}", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test with TODO list (array of objects)
+        tst.test(
+               "<tool_call>\n"
+               "<function=todo_list>\n"
+               "<parameter=todos>\n"
+               "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({
+                todo_list
+        })
+            .expect_tool_calls({
+                { "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test flexible optional argument ordering (2 required + 4 optional, reversed optional order)
+        tst.test(
+               "<tool_call>\n"
+               "<function=tool_2req_4opt>\n"
+               "<parameter=req1>\nhello\n</parameter>\n"
+               "<parameter=req2>\n42\n</parameter>\n"
+               "<parameter=opt4>\n100\n</parameter>\n"
+               "<parameter=opt2>\n200\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ tool_2req_4opt })
+            .expect_tool_calls({
+                { "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test flexible optional argument ordering (2 required + 5 optional, reversed optional order)
+        tst.test(
+               "<tool_call>\n"
+               "<function=tool_2req_5opt>\n"
+               "<parameter=req1>\nworld\n</parameter>\n"
+               "<parameter=req2>\n7\n</parameter>\n"
+               "<parameter=opt5>\nlast\n</parameter>\n"
+               "<parameter=opt3>\nmiddle\n</parameter>\n"
+               "<parameter=opt1>\nfirst\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ tool_2req_5opt })
+            .expect_tool_calls({
+                { "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order)
+        tst.test(
+               "<tool_call>\n"
+               "<function=tool_2req_5opt>\n"
+               "<parameter=req1>\ntest\n</parameter>\n"
+               "<parameter=req2>\n99\n</parameter>\n"
+               "<parameter=opt3>\nc\n</parameter>\n"
+               "<parameter=opt1>\na\n</parameter>\n"
+               "<parameter=opt5>\ne\n</parameter>\n"
+               "<parameter=opt4>\n4\n</parameter>\n"
+               "<parameter=opt2>\n2\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ tool_2req_5opt })
+            .expect_tool_calls({
+                { "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // nullable string type ["string", "null"]
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_str>\n"
+               "<parameter=name>\nhello world\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_string_tool })
+            .expect_tool_calls({
+                { "set_nullable_str", R"({"name": "hello world"})", {} },
+            })
+            .run();
+
+        // nullable string with null first in type array ["null", "string"]
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_str_nf>\n"
+               "<parameter=name>\nhello world\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_string_null_first_tool })
+            .expect_tool_calls({
+                { "set_nullable_str_nf", R"({"name": "hello world"})", {} },
+            })
+            .run();
+
+        // nullable integer type ["integer", "null"] - should use JSON value path, not string
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_nullable_int>\n"
+               "<parameter=count>\n42\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ nullable_int_tool })
+            .expect_tool_calls({
+                { "set_nullable_int", R"({"count": 42})", {} },
+            })
+            .run();
+
+        // enum without explicit type key - should infer string from enum values
+        tst.test(
+               "<tool_call>\n"
+               "<function=set_unit>\n"
+               "<parameter=unit>\ncelsius\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .tools({ enum_no_type_tool })
+            .expect_tool_calls({
+                { "set_unit", R"({"unit": "celsius"})", {} },
+            })
+            .run();
     }
     {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(tmpls.get(), inputs_tools_builtin).format);
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
-                      common_chat_templates_apply(
-                          read_templates("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja").get(),
-                          inputs_tools_builtin)
-                          .format);
-
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LLAMA_3_X}));
-
-        // test_templates(tmpls.get(), end_tokens, message_assist, tools, R"(?)", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call_code_interpreter, llama_3_1_tools,
-                      "<|python_tag|>code_interpreter.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call_python, tools,
-                      "<|python_tag|>python.call(code=\"print('hey')\")");
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"XYZCITY\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ get_time_tool })
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls_and_reasoning("get_time", "{\"city\":\"Tokyo\"}", "REASONING"))
+            .run();
+    }
 
-        assert_equals(COMMON_CHAT_FORMAT_LLAMA_3_X, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test(
+               "REASONING</think>CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": "
+               "\"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({
+                get_time_tool, get_weather_tool
+        })
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .parallel_tool_calls(true)
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "REASONING", "CONTENT",
+                { { "get_time", "{\"city\":\"Paris\"}" }, { "get_weather", "{\"city\":\"Paris\"}" } }))
+            .run();
+    }
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "{\"name\": \"special_function\", \"parameters\": {\"arg1\": 1}}");
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("REASONING</think>\nCONTENT")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(simple_assist_msg("CONTENT", "REASONING\n"))
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                      common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
-            common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY,
-                        common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        for (auto is_partial : { false, true }) {
-            assert_equals(
-                message_assist_call,
-                test_chat_parse(
-                    "<function=special_function>{\"arg1\": 1}</function>",
-                    is_partial,
-                    {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
-        }
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
+        tst.test("CONTENT").enable_thinking(false).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
+            expect(simple_assist_msg("CONTENT", "")).run();
+    }
 
-        assert_equals(
-            message_assist_call,
-            test_chat_parse(
-                "<function=special_function>{\"arg1\": 1}<",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
+    // DeepSeek V3.2 tests - format uses DSML markup:
+    //   <｜DSML｜function_calls>
+    //   <｜DSML｜invoke name="foo">
+    //   <｜DSML｜parameter name="bar" string="true|false">value</｜DSML｜parameter>
+    //   </｜DSML｜invoke>
+    //   </｜DSML｜function_calls>
+    // Reasoning uses <think>...</think>. The generation prompt ends in <think> (thinking mode)
+    // or <think></think> (non-thinking mode).
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.2.jinja", detailed_debug);
+
+        // Pure content (non-thinking mode)
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist)
+            .run();
+
+        // Thinking + content
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Thinking + tool call (single, string param)
+        tst.test(
+               "Let me check the time</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"get_time\">\n"
+               "<｜DSML｜parameter name=\"city\" string=\"true\">Tokyo</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls_and_reasoning("get_time", R"({"city": "Tokyo"})", "Let me check the time"))
+            .run();
+
+        // Tool call without reasoning (non-thinking mode), integer param (string="false")
+        tst.test(
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"special_function\">\n"
+               "<｜DSML｜parameter name=\"arg1\" string=\"false\">1</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Multiple parallel tool calls with reasoning
+        tst.test(
+               "Calling both</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"get_time\">\n"
+               "<｜DSML｜parameter name=\"city\" string=\"true\">Paris</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "<｜DSML｜invoke name=\"get_weather\">\n"
+               "<｜DSML｜parameter name=\"city\" string=\"true\">Paris</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .parallel_tool_calls(true)
+            .tools({ get_time_tool, get_weather_tool })
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "Calling both", "",
+                { { "get_time", R"({"city": "Paris"})" }, { "get_weather", R"({"city": "Paris"})" } }))
+            .run();
+
+        // Tool call with content before tool calls
+        tst.test(
+               "Thinking about it</think>"
+               "Let me call the function.\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"special_function\">\n"
+               "<｜DSML｜parameter name=\"arg1\" string=\"false\">1</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect_reasoning("Thinking about it")
+            .expect_content("Let me call the function.")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+            })
+            .run();
+
+        // Tool call with negative number
+        tst.test(
+               "Test negative</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"magic_int\">\n"
+               "<｜DSML｜parameter name=\"ref\" string=\"false\">-14</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Test negative")
+            .expect_tool_calls({
+                { "magic_int", R"({"ref": -14})", {} },
+            })
+            .run();
+
+        // Tool call with decimal number
+        tst.test(
+               "Test decimal</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"amount\">\n"
+               "<｜DSML｜parameter name=\"orig\" string=\"false\">3.14</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ amount_tool })
+            .expect_reasoning("Test decimal")
+            .expect_tool_calls({
+                { "amount", R"({"orig": 3.14})", {} },
+            })
+            .run();
+
+        // Tool call with boolean
+        tst.test(
+               "Test boolean</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"toggle\">\n"
+               "<｜DSML｜parameter name=\"enabled\" string=\"false\">true</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ toggle_tool })
+            .expect_reasoning("Test boolean")
+            .expect_tool_calls({
+                { "toggle", R"({"enabled": true})", {} },
+            })
+            .run();
+
+        // Tool call with array parameter (JSON-formatted)
+        tst.test(
+               "Test array</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"todo_list\">\n"
+               "<｜DSML｜parameter name=\"todos\" string=\"false\">[\"buy milk\",\"walk dog\"]</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ todo_list })
+            .expect_reasoning("Test array")
+            .expect_tool_calls({
+                { "todo_list", R"({"todos": ["buy milk", "walk dog"]})", {} },
+            })
+            .run();
+
+        // Tool call with object parameter (JSON-formatted)
+        tst.test(
+               "Test object</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"set_config\">\n"
+               "<｜DSML｜parameter name=\"config\" string=\"false\">{\"theme\":\"dark\",\"level\":2}</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ config_tool })
+            .expect_reasoning("Test object")
+            .expect_tool_calls({
+                { "set_config", R"({"config": {"theme": "dark", "level": 2}})", {} },
+            })
+            .run();
+
+        // Edge case: empty reasoning
+        tst.test(
+               "</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"get_time\">\n"
+               "<｜DSML｜parameter name=\"city\" string=\"true\">XYZCITY</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", R"({"city": "XYZCITY"})"))
+            .run();
+
+        // Edge case: tool call with multiple params (mixed types, string first)
+        tst.test(
+               "Multi-arg call</think>\n\n"
+               "<｜DSML｜function_calls>\n"
+               "<｜DSML｜invoke name=\"magic_int\">\n"
+               "<｜DSML｜parameter name=\"ref\" string=\"false\">42</｜DSML｜parameter>\n"
+               "<｜DSML｜parameter name=\"name\" string=\"true\">foo bar</｜DSML｜parameter>\n"
+               "</｜DSML｜invoke>\n"
+               "</｜DSML｜function_calls>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Multi-arg call")
+            .expect_tool_calls({
+                { "magic_int", R"({"ref": 42, "name": "foo bar"})", {} },
+            })
+            .run();
+    }
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<function=special_function>{\"arg1\": 1}</function>");
+    // GLM-4.6 tests - format: <tool_call>function_name\n<arg_key>...</arg_key>\n<arg_value>...</arg_value>\n</tool_call>
+    {
+        auto tst = peg_tester("models/templates/GLM-4.6.jinja", detailed_debug);
+        tst.test(
+               "<tool_call>special_function\n"
+               "<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n"
+               "</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
     }
+
+    // GLM-4.7-Flash tests - format: <tool_call>function_name<arg_key>...</arg_key><arg_value>...</arg_value></tool_call>
+    // Note: Template uses forced-open thinking mode (prompt ends with <think>)
     {
-        auto tmpls = read_templates("models/templates/meetkai-functionary-medium-v3.2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eom_id|>", "<|eot_id|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "Hello, world!\nnono\nWhat's up?",
-                "",
-                "special_function",
-                "{\"arg1\": 1}"),
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\n"
-                "nono\n"
-                "What's up?>>>special_function\n"
-                "{\"arg1\": 1}\n",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call_python_lines_unclosed,
-            test_chat_parse(
-                "python\n"
-                "# This is a program:\n"
-                "print('hey')",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "special_function\n"
-                "{\"arg1\": 1} \n                    ",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "all\n"
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2}));
-
-        test_templates(tmpls.get(), end_tokens, message_assist, {},
-                      "all\n"
-                      "Hello, world!\n"
-                      "What's up?",
-                      /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "special_function\n"
-                      "{\"arg1\": 1}");
+        auto tst = peg_tester("models/templates/GLM-4.7-Flash.jinja", detailed_debug);
+
+        // Pure content (no reasoning)
+        tst.test("Hello, world!\nWhat's up?")
+            .enable_thinking(false)
+            .expect(message_assist)
+            .expect_reconstruction()
+            .run();
+
+        // Reasoning with content (forced-open mode - input starts after <think>)
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .expect_reconstruction()
+            .run();
+
+        // Tool call without reasoning
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+
+        // Tool call with reasoning (forced-open mode)
+        tst.test(
+               "I'm\nthinking</think>"
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .expect_reconstruction()
+            .run();
+
+        tst.test(
+               "<tool_call>special_function"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "</tool_call>"
+               "<tool_call>special_function_with_opt"
+               "<arg_key>arg1</arg_key><arg_value>1</arg_value>"
+               "<arg_key>arg2</arg_key><arg_value>2</arg_value>"
+               "</tool_call>")
+            .enable_thinking(false)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .expect_reconstruction()
+            .run();
+
+        // #20650: tool with no required args, model emits <tool_call>name</tool_call> with no arg tags.
+        {
+            static common_chat_tool no_args_tool{
+                "read_file_diff_md", "Reads a file diff",
+                R"({"type":"object","properties":{"review_id":{"type":"string"},"file_id":{"type":"string"}}})",
+            };
+            tst.test(
+                   "Let me read the diff content."
+                   "</think>"
+                   "<tool_call>read_file_diff_md</tool_call>")
+                .enable_thinking(true)
+                .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+                .tools({ no_args_tool })
+                .expect_reasoning("Let me read the diff content.")
+                .expect_tool_calls({{ "read_file_diff_md", "{}", {} }})
+                .expect_reconstruction()
+                .run();
+        }
     }
+
+    // Verify the throw path produces a readable error message, not std::out_of_range.
+    // #20424 introduced effective_input = generation_prompt + input, but the throw
+    // uses input.substr(result.end) where result.end is in effective_input space.
     {
-        auto tmpls = read_templates("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja");
-        std::vector<std::string>   end_tokens{ "<|eot_id|>" };
+        if (!g_template_filter.empty() && std::string("models/templates/GLM-4.7-Flash.jinja").find(g_template_filter) != std::string::npos) {
+            auto tmpls = common_chat_templates_ptr(
+                common_chat_templates_init(nullptr, read_file("models/templates/GLM-4.7-Flash.jinja")));
 
-        assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_FIREFUNCTION_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+            static common_chat_tool weather_tool{
+                "get_weather", "Get weather",
+                R"({"type":"object","properties":{"city":{"type":"string"}},"required":["city"]})",
+            };
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      " functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]");
-    }
-    {
-        // Original DeepSeek R1 template. Leaves <｜tool▁calls▁begin｜> and others unclosed. Our logic fixes the prompt.
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
+            common_chat_templates_inputs inputs;
+            inputs.tools = { weather_tool };
+            inputs.enable_thinking = true;
+            inputs.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
+            inputs.add_generation_prompt = true;
+            inputs.use_jinja = true;
+            common_chat_msg msg;
+            msg.role = "user";
+            msg.content = "get_weather";
+            inputs.messages = { msg };
 
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
             auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1, params.format);
-            assert_equals(true, params.thinking_forced_open);
+            common_peg_arena arena;
+            arena.load(params.parser);
+            common_chat_parser_params pp(params);
+
+            // generation_prompt is non-empty for thinking models, so result.end
+            // will be offset by generation_prompt.size() into effective_input space.
+            assert(!pp.generation_prompt.empty());
+
+            std::string bad_input =
+                "Thinking.\n"
+                "</think>"
+                "<tool_call>get_weather"
+                "<arg_key>city</arg_key><arg_value>Tokyo</arg_value>"
+                "</tool_call>\n";
+
+            bool got_runtime_error = false;
+            bool got_out_of_range = false;
+            std::string error_msg;
+            try {
+                common_chat_peg_parse(arena, bad_input, /*is_partial=*/false, pp);
+            } catch (const std::out_of_range & e) {
+                got_out_of_range = true;
+                error_msg = e.what();
+            } catch (const std::runtime_error & e) {
+                got_runtime_error = true;
+                error_msg = e.what();
+            }
+            GGML_ASSERT(!got_out_of_range && "throw path crashed with out_of_range (input.substr in effective_input space)");
+            GGML_ASSERT(got_runtime_error  && "throw path should produce std::runtime_error with parse position");
         }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "<think>I'm\nthinking"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with"),
-            test_chat_parse(
-                "I need to remember the correct syntax. It starts with <｜tool▁calls▁begin｜> and ends with",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unopened_unparsed,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            // Latest template update (ast of 20250209) adds a trailing <think>\n if add_generation_prompt is true.
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-        //               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-        //               "```json\n"
-        //               "{\"arg1\": 1}\n"
-        //               // Look what's not here: <｜tool▁calls▁end｜> (also missing the <｜end▁of▁sentence｜>, but that is removed lazily by the test's delta logic)
-        //               "```<｜tool▁call▁end｜>",
-        //               /* expect_grammar_triggered= */ true,
-        //               /* test_grammar_if_triggered= */ false);
     }
+
+    // Kimi-K2-Thinking tests - custom parser
+    // Unique feature: tool call ID embeds function name as functions.<name>:<counter>
     {
-        // Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
-        auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_R1,                   common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
+        auto tst = peg_tester("models/templates/Kimi-K2-Thinking.jinja", detailed_debug);
+
+        // Basic content only
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Single tool call
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Single tool call with reasoning
+        tst.test(
+               "<think>I'm thinking about this</think>"
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "I'm thinking about this", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Tool call with content
+        tst.test(
+               "Hello, world!\nWhat's up?"
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("Hello, world!\nWhat's up?", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0"))
+            .run();
+
+        // Multiple tool calls (parallel) - tests the indexing behavior
+        tst.test(
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.special_function_with_opt:1<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", "functions.special_function_with_opt:1" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning
+        tst.test(
+               "<think>I need to call two functions</think>"
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Python tool with multiline code
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.python:0<|tool_call_argument_begin|>"
+               "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ python_tool })
+            .expect_tool_calls({
+                { "python", "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}", "functions.python:0" },
+            })
+            .run();
+
+        // Tool call with empty arguments
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.empty_args:0<|tool_call_argument_begin|>"
+               "{}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}", "functions.empty_args:0"))
+            .run();
+
+        // Partial tool call (streaming)
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": ")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": ", "functions.special_function:0"))
+            .run();
+
+        // Three tool calls to verify counter continues incrementing
+        tst.test(
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print(1)\"}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.html:2<|tool_call_argument_begin|>{\"markup\": \"<p>test</p>\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool, html_tool
+        })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print(1)\"}", "functions.python:1" },
+                { "html", "{\"markup\": \"<p>test</p>\"}", "functions.html:2" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning, call *inside thinking block*
+        tst.test(
+               "<think>I need to call two functions"
+               "<|tool_calls_section_begin|>"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|>"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}<|tool_call_end|>"
+               "<|tool_calls_section_end|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Multiple tool calls with reasoning, call *inside thinking block* and *without section markers or end markers
+        tst.test(
+               "<think>I need to call two functions"
+               "<|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}"
+               "<|tool_call_begin|>functions.python:1<|tool_call_argument_begin|>{\"code\": \"print('hey')\"}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, python_tool
+        })
+            .expect_reasoning("I need to call two functions")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "functions.special_function:0" },
+                { "python", "{\"code\": \"print('hey')\"}", "functions.python:1" },
+            })
+            .run();
+
+        // Real life test - execute_command
+        tst.test("<|tool_call_begin|>functions.execute_command:0<|tool_call_argument_begin|>{\"command\": \"ls -lah\""
+            ", \"cwd\": \"/home/jarvis/development/exllamav3\", \"timeout\": 10}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .parallel_tool_calls(true)
+            .tools({
                 {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-
-        assert_msg_equals(message_assist_call_thoughts_unparsed,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<｜tool▁calls｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_DEEPSEEK_R1}));
-
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>\n\n"
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
+                    /* .name = */ "execute_command",
+                    /* .description = */ "Execute shell command",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "command": {
+                                "type": "string",
+                                "description": "Shell command to execute"
+                            },
+                            "cwd": {
+                                "type": "string",
+                                "description": "Working directory"
+                            },
+                            "timeout": {
+                                "type": "integer",
+                                "description": "The timeout in seconds"
+                            }
+                        },
+                        "required": ["command"]
+                    })"
+                }
+            }).
+            expect_tool_calls({
                 {
-                    /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
-                "```json\n"
-                "{\"arg1\": 1}\n"
-                "```<｜tool▁call▁end｜><｜tool▁calls▁end｜>");
+                    "execute_command",
+                    R"({"command": "ls -lah", "cwd": "/home/jarvis/development/exllamav3", "timeout": 10})",
+                    "functions.execute_command:0"
+                }
+            })
+            .run();
     }
+
     {
-        auto tmpls = read_templates("models/templates/ibm-granite-granite-3.3-2B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|end_of_text|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-
-        assert_equals(COMMON_CHAT_FORMAT_GRANITE, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(message_assist_empty,
-            test_chat_parse(
-                "<think",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(message_assist_thoughts_no_content,
-            test_chat_parse(
-                "<think>I'm\nthinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-        assert_msg_equals(
-            message_assist_empty,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><response",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_empty_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\"",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_GRANITE}));
-        assert_msg_equals(
-            message_assist_call_cutoff_args,
-            test_chat_parse(
-                "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(
-            message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-    // TODO @ngxson : generic tool call should be removed in the future
-#if 0
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call_id, tools,
-                      "{\n"
-                      "  \"tool_calls\": [\n"
-                      "    {\n"
-                      "      \"name\": \"special_function\",\n"
-                      "      \"arguments\": {\n"
-                      "        \"arg1\": 1\n"
-                      "      },\n"
-                      "      \"id\": \"123456789\"\n"
-                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
-                      "}",
-                      /* expect_grammar_triggered= */ false
-        );
-#endif
+        auto kimi_id_special_func_tool_call =
+            simple_assist_msg("", "", "special_function", "{\"arg1\": 1}", "functions.special_function:0");
+
+        // Kimi-K2 old template
+        auto tst = peg_tester("models/templates/moonshotai-Kimi-K2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(kimi_id_special_func_tool_call)
+            .expect_reconstruction()
+            .run();
+
+        // Kimi-K2-Instruct
+        auto tst2 = peg_tester("models/templates/Kimi-K2-Instruct.jinja", detailed_debug);
+        tst2.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst2.test(
+               "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>"
+               "{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>")
+            .tools({ special_function_tool })
+            .expect(kimi_id_special_func_tool_call)
+            .expect_reconstruction()
+            .run();
     }
-    {
-        auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
-        std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
 
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthink",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-
-        // Test parse_tool_calls == false
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-        assert_msg_equals(
-            simple_assist_msg("", "I'm\nthinking"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ false,
-                }));
-
-        // Test reasoning formats
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                }));
-
-        assert_msg_equals(
-            simple_assist_msg(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                    /* .reasoning_in_content = */ true,
-                }));
-
-        // Test tool calling in role header
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                " to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
-        assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
-            test_chat_parse(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
-                "<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
-                }));
+    // LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|>
+    {
+        auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug);
+
+        // Basic content only
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Single tool call without reasoning
+        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with string argument
+        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
+
+        // Tool call with reasoning (enable_thinking=true)
+        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Multiple tool calls (parallel)
+        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+            })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        // Tool call with reasoning and content
+        tst.test("<think>I need to call a function</think>"
+                 "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ get_time_tool })
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "I need to call a function", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+            ))
+            .run();
+
+        // Python tool with multiline code in string
+        tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
+            .tools({ python_tool })
+            .expect_tool_calls({
+                { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
+            })
+            .run();
+
+        // Partial tool call (streaming)
+        tst.test("<|tool_call_start|>[special_function(arg1=")
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
+            .run();
+
+        // Tool call with empty arguments
+        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}"))
+            .run();
+
+        // fake tool call marker in reasoning
+        tst.test(
+               "<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
+               "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+            })
+            .run();
     }
+
+    // LFM2.5 tests - uses plain "List of tools: [...]" and bare [name(args)] without wrapper tokens
     {
-        // Seed-OSS format tests
-        auto tmpls = read_templates("models/templates/ByteDance-Seed-OSS.jinja");
-        std::vector<std::string> end_tokens{ "<seed:eos>" };
+        auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
+
+        // Basic content only
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Single tool call without reasoning
+        tst.test("[special_function(arg1=1)]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with string argument
+        tst.test("[get_time(city=\"XYZCITY\")]")
+            .tools({ get_time_tool })
+            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+            .run();
+
+        // Tool call with reasoning (enable_thinking=true)
+        tst.test("<think>I'm\nthinking</think>[special_function(arg1=1)]")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Multiple tool calls (parallel)
+        tst.test("[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+            })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", {} },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+            })
+            .run();
+
+        // Tool call with content before tool call
+        tst.test("Let me check the time.[get_time(city=\"Paris\")]")
+            .tools({ get_time_tool })
+            .expect(message_with_reasoning_content_and_multiple_tool_calls(
+                "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+            ))
+            .run();
+
+        // Partial tool call (streaming)
+        tst.test("[special_function(arg1=")
+            .tools({ special_function_tool })
+            .is_partial(true)
+            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
+            .run();
+
+        // Tool call with empty arguments
+        tst.test("[empty_args()]")
+            .tools({ empty_args_tool })
+            .expect(simple_assist_msg("", "", "empty_args", "{}"))
+            .run();
+    }
 
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_SEED_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
+    // Apertus-8B-Instruct tests - FUNC_NAME_AS_KEY format
+    // Format: <|tools_prefix|>[{"function_name": {...arguments...}}]<|tools_suffix|>
+    {
+        auto tst = peg_tester("models/templates/Apertus-8B-Instruct.jinja", detailed_debug);
+        tst.test("<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+    }
 
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
+    // MiniMax-M2 tests - XML invoke format with parameter tags
+    // Format: <minimax:tool_call><invoke name="func"><parameter name="key">value</parameter></invoke></minimax:tool_call>
+    {
+        auto tst = peg_tester("models/templates/MiniMax-M2.jinja", detailed_debug);
+        tst.test("</think>Hello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist).run();
+
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?").enable_thinking(true).reasoning_format(COMMON_REASONING_FORMAT_AUTO).expect(message_assist_thoughts).run();
+
+        tst.test("Let's call a tool:</think><minimax:tool_call>\n<invoke name=\"empty_args\">\n</invoke>\n</minimax:tool_call>").
+            enable_thinking(true).
+            reasoning_format(COMMON_REASONING_FORMAT_AUTO).
+            tools({ empty_args_tool }).
+            expect(message_with_reasoning_and_tool_call("Let's call a tool:", "empty_args", "{}")).
+            run();
+
+        tst.test(
+               "</think><minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter "
+               "name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .run();
+    }
 
-        // Test simple reasoning content
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!", "I'm thinking about the answer"),
-            test_chat_parse(
-                "<seed:think>I'm thinking about the answer</seed:think>Hello, world!",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test budget reflection tags
-        common_chat_msg msg_budget_reflect;
-        msg_budget_reflect.role = "assistant";
-        msg_budget_reflect.content = "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>I need to calculate this step by step.";
-        msg_budget_reflect.reasoning_content = "Token usage: 45/1000\nI should continue thinking to find the best solution.";
-        assert_msg_equals(
-            msg_budget_reflect,
-            test_chat_parse(
-                "<seed:think>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:think>"
-                "<seed:cot_budget_reflect>Token usage: 45/1000\nI should continue thinking to find the best solution.</seed:cot_budget_reflect>"
-                "I need to calculate this step by step.",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test tool calls with Seed-OSS format
-        common_chat_msg msg_tool_call;
-        msg_tool_call.role = "assistant";
-        msg_tool_call.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_tool_call,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test reasoning + tool call combination
-        common_chat_msg msg_reasoning_tool;
-        msg_reasoning_tool.role = "assistant";
-        msg_reasoning_tool.content = "";
-        msg_reasoning_tool.reasoning_content = "I need to calculate the sum of these numbers";
-        msg_reasoning_tool.tool_calls.push_back({"calculate_sum", "{\"numbers\": [1, 2, 3]}", ""});
-        assert_msg_equals(
-            msg_reasoning_tool,
-            test_chat_parse(
-                "<seed:think>I need to calculate the sum of these numbers</seed:think>"
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1, 2, 3]</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test deltas: the number of tool calls in partial parses should never decrease
-        std::string tool_msg = "<seed:tool_call>\n"
-            "<function=fun>\n"
-            "<parameter=smth>[1, 2, 3]</parameter>\n"
-            "</function>";
-        std::size_t previousToolCalls = 0;
-        for (std::size_t i = std::string("<seed:tool_call>").length(); i < tool_msg.length() - 1; i++) {
-            auto partial = tool_msg.substr(0, i);
-            auto partial_res = test_chat_parse(partial, true, { COMMON_CHAT_FORMAT_SEED_OSS, COMMON_REASONING_FORMAT_DEEPSEEK });
-            if (partial_res.tool_calls.size() < previousToolCalls) {
-                throw std::runtime_error("Tool call size decreased on partial: " + partial + " from " + std::to_string(previousToolCalls) + " to " + std::to_string(partial_res.tool_calls.size()));
-            }
-            previousToolCalls = partial_res.tool_calls.size();
-        }
+    // NVIDIA-Nemotron-Nano-v2 tests - <TOOLCALL>...</TOOLCALL> format
+    // Format: <TOOLCALL>[{"name": "func", "arguments": {...}}]</TOOLCALL>
+    {
+        auto tst = peg_tester("models/templates/NVIDIA-Nemotron-Nano-v2.jinja", detailed_debug);
+        tst.test("<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL><SPECIAL_12>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
 
-        // Test multiple parameters in tool call
-        common_chat_msg msg_multi_param;
-        msg_multi_param.role = "assistant";
-        msg_multi_param.tool_calls.push_back({"process_data", "{\"input\": \"test\", \"format\": \"json\"}", ""});
-        assert_msg_equals(
-            msg_multi_param,
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=process_data>\n"
-                "<parameter=input>test</parameter>\n"
-                "<parameter=format>json</parameter>\n"
-                "</function>\n"
-                "</seed:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test partial parsing for incomplete tool call - don't actually add the call until parsing parameters is done
-        assert_msg_equals(
-            simple_assist_msg("", "", "calculate_sum", "{\"numbers\":"),
-            test_chat_parse(
-                "<seed:tool_call>\n"
-                "<function=calculate_sum>\n"
-                "<parameter=numbers>[1,\n",
-                /* is_partial= */ true,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
-
-        // Test incomplete reasoning tag
-        assert_msg_equals(
-            simple_assist_msg("", "I was thinking"),
-            test_chat_parse(
-                "<seed:think>I was thinking",
-                /* is_partial= */ true,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_SEED_OSS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test content without reasoning
-        assert_msg_equals(
-            simple_assist_msg("This is a simple response without reasoning."),
-            test_chat_parse(
-                "This is a simple response without reasoning.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_SEED_OSS}));
+    // CohereForAI-c4ai-command-r7b (uses START_RESPONSE/END_RESPONSE, START_THINKING/END_THINKING, START_ACTION/END_ACTION)
+    {
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja", detailed_debug);
+        tst.test("<|START_RESPONSE|>Hello, world!\nWhat's up?<|END_RESPONSE|>").expect(message_assist).run();
+        tst.test(
+               "<|START_THINKING|>I'm\nthinking<|END_THINKING|>"
+               "<|START_ACTION|>[\n"
+               "    {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
+               "]<|END_ACTION|>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect(message_assist_thoughts_call_idx)
+            .run();
     }
+    // CohereForAI-c4ai-command-r-plus (uses markdown code block format)
     {
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-Nano-v2.jinja");
-        std::vector<std::string> end_tokens{ "<SPECIAL_12>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_NEMOTRON_V2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_NEMOTRON_V2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_NEMOTRON_V2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?\n",
-                      /* expect_grammar_triggered= */ false);
+        auto tst = peg_tester("models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja", detailed_debug);
+        tst.test("<|CHATBOT_TOKEN|>Hello, world!\nWhat's up?<|END_OF_TURN_TOKEN|>").expect(message_assist).run();
+        // Tool calls: Action: followed by JSON code block
+        tst.test(
+               "Action:\n"
+               "```json\n"
+               "[{\"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}]\n"
+               "```")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
 
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<TOOLCALL>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</TOOLCALL>",
-                      /* expect_grammar_triggered= */ true
-        );
+    // mistralai-Mistral-Nemo-Instruct-2407.jinja
+    {
+        auto tst = peg_tester("models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("[TOOL_CALLS][{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, \"id\": \"123456789\"}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .expect_reconstruction()
+            .run();
     }
     {
-        auto tmpls = read_templates("models/templates/deepseek-ai-DeepSeek-V3.1.jinja");
-        std::vector<std::string>   end_tokens{ "<｜end▁of▁sentence｜>" };
-
-        for (const auto & inputs : { inputs_no_tools, inputs_tools }) {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, params.format);
-            assert_equals(true, params.thinking_forced_open);
-        }
-
-        test_templates(tmpls.get(), end_tokens, message_assist, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        test_templates(tmpls.get(), end_tokens, message_assist_thoughts, tools, "</think>Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
-        assert_msg_equals(
-            simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
-            test_chat_parse(
-                "I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                }));
-        // variant: thinking forced open, reasoning_format none
-        assert_msg_equals(
-            simple_assist_msg("REASONING</think>ok", ""),
-            test_chat_parse(
-                "REASONING</think>ok",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: happy path for when it works as the model card says it should
-        assert_msg_equals(
-            simple_assist_msg("", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + thinking open
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: simple + multiple tool calls
-        common_chat_msg message_assist_multiple_calls;
-        message_assist_multiple_calls.role = "assistant";
-        message_assist_multiple_calls.content = "CONTENT";
-        message_assist_multiple_calls.tool_calls.push_back({"get_time", "{\"city\":\"Paris\"}", ""});
-        message_assist_multiple_calls.tool_calls.push_back({"get_weather", "{\"city\":\"Paris\"}", ""});
-        assert_msg_equals(
-            message_assist_multiple_calls,
-            test_chat_parse(
-                "CONTENT<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁call▁begin｜>get_weather<｜tool▁sep｜>{\"city\": \"Paris\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time2<｜tool▁sep｜>{\"city\": \"Tokyo2\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>REASONING</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + not partial
-        //          This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
-        //          to make tool calls in reasoning content according to the model card, but it does sometimes, so
-        //          add the reasoning content as regular content and parse the tool calls.
-        assert_msg_equals(
-            simple_assist_msg("REASONING", "", "get_time", "{\"city\":\"Tokyo\"}"),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking forced open + tool call in reasoning content + no closing think + partial
-        assert_msg_equals(
-            simple_assist_msg("", "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>", "", ""),
-            test_chat_parse(
-                "REASONING<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>get_time<｜tool▁sep｜>{\"city\": \"Tokyo\"}<｜tool▁call▁end｜><｜tool▁calls▁end｜>",
-                /* is_partial= */ true,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ true,
-                    /* .parse_tool_calls = */ true,
-                }));
-        // variant: thinking not forced open + missing reasoning + no tool calls
-        assert_msg_equals(
-            simple_assist_msg("CONTENT", ""),
-            test_chat_parse(
-                "CONTENT",
-                /* is_partial= */ false,
-                {
-                    COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                    /* .reasoning_in_content = */ false,
-                    /* .thinking_forced_open = */ false,
-                    /* .parse_tool_calls = */ true,
-                }));
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.1.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("<function=special_function>{\"arg1\": 1}</function>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
     }
+    // Functionary v3.2 - recipient-based format: >>>recipient\n{content}
     {
-        auto tmpls = read_templates("models/templates/Apertus-8B-Instruct.jinja");
-        std::vector<std::string> end_tokens{ "<|assistant_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_APERTUS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_APERTUS}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<|inner_prefix|>I'm\nthinking<|inner_suffix|><|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_APERTUS,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
+        auto tst = peg_tester("models/templates/meetkai-functionary-medium-v3.2.jinja", detailed_debug);
+        tst.test("all\nHello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("special_function\n{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+    }
 
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<|tools_prefix|>[{\"special_function\": {\"arg1\": 1}}]<|tools_suffix|>",
-                      /* expect_grammar_triggered= */ true
-        );
+    // FireFunction
+    {
+        auto tst = peg_tester("models/templates/fireworks-ai-llama-3-firefunction-v2.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test(" functools[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+    }
 
-        // TODO @ngxson : not sure why this fails, but not very important for now
-        // assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
+    // DeepSeek R1 Distill Llama 8B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
+    {
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja", detailed_debug);
+        tst.test("</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist)
+            .run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+    }
+    // llama-cpp DeepSeek R1 template (always forced-open thinking)
+    {
+        auto tst = peg_tester("models/templates/llama-cpp-deepseek-r1.jinja", detailed_debug);
+        tst.test("</think>Hello, world!\nWhat's up?").expect(message_assist).reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "</think><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .parallel_tool_calls(true)
+            .expect(message_assist_call)
+            .run();
     }
+    // DeepSeek R1 Distill Qwen 32B - reasoning tests only (forced open thinking)
+    // Note: Template uses forced-open mode (prompt ends with <think>), so input shouldn't include opening tag
     {
-        // LFM2 format tests
-        auto tmpls = read_templates("models/templates/llama-cpp-lfm2.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
+        auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja", detailed_debug);
+        tst.test("</think>Hello, world!\nWhat's up?").enable_thinking(true).
+            reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
+            expect(message_assist).run();
+        tst.test("I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_thoughts)
+            .run();
+        tst.test(
+               "<｜tool▁calls▁begin｜><｜tool▁call▁begin｜>function<｜tool▁sep｜>special_function\n"
+               "```json\n{\"arg1\": 1}```<｜tool▁call▁end｜><｜tool▁calls▁end｜>")
+            .tools({ special_function_tool })
+            .enable_thinking(false)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .expect(message_assist_call)
+            .run();
+    }
 
-        auto inputs_tools_forced_json_schema = std::invoke([&]() -> common_chat_templates_inputs {
-            common_chat_templates_inputs inputs;
-            inputs.messages = {
-                std::invoke([&]() -> common_chat_msg {
-                    common_chat_msg msg;
-                    msg.role = "system";
-                    msg.content = "force json schema.\n";
-                    return msg;
-                }),
-                message_user,
-            };
-            inputs.tools = {special_function_tool};
-            return inputs;
-        });
+    // MiMo-VL / Hermes 3 / Qwen 2.5 (Common <tool_call> JSON format)
+    for (const auto & path :
+         { "models/templates/MiMo-VL.jinja", "models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja",
+           "models/templates/Qwen-Qwen2.5-7B-Instruct.jinja" }) {
+        auto tst = peg_tester(path, detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("<tool_call>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}\n</tool_call>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+    }
 
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_no_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-        }
+    // Apriel 1.5
+    {
+        auto tst = peg_tester("models/templates/unsloth-Apriel-1.5.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        tst.test("<tool_calls>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]</tool_calls>")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+    }
 
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools);
-            assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params.format);
-            assert_equals(false, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(true, params.grammar.empty());
-        }
+    // Apriel 1.6 Thinker (reasoning-only support)
+    {
+        auto tst = peg_tester("models/templates/Apriel-1.6-15b-Thinker-fixed.jinja", detailed_debug);
+
+        // Implicit reasoning start (forced open)
+        tst.test("I'm\nthinking\n[BEGIN FINAL RESPONSE]\nHello, world!\nWhat's up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .expect(simple_assist_msg("Hello, world!\nWhat's up?", "Here are my reasoning steps:\nI'm\nthinking"))
+            .expect_reconstruction()
+            .run();
+
+        // Reasoning + Tool calls
+        tst.test(
+               "I'm\nthinking\n[BEGIN FINAL RESPONSE]\n<tool_calls>[{\"name\": \"special_function\", \"arguments\": "
+               "{\"arg1\": 1}}]</tool_calls>")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .enable_thinking(true)
+            .tools({ special_function_tool })
+            .expect(simple_assist_msg("", "Here are my reasoning steps:\nI'm\nthinking", "special_function", "{\"arg1\":1}"))
+            .run();
+    }
 
-        {
-            auto params = common_chat_templates_apply(tmpls.get(), inputs_tools_forced_json_schema);
-            assert_equals(COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS, params.format);
-            assert_equals(true, params.grammar_lazy);
-            assert_equals(std::string(R"(<|im_start|>system
-List of tools: <|tool_list_start|>[{"type": "function", "function": {"name": "special_function", "description": "I'm special", "parameters": {"type": "object", "properties": {"arg1": {"type": "integer", "description": "The arg."}}, "required": ["arg1"]}}}]<|tool_list_end|><|im_end|>
-<|im_start|>user
-Hey there!<|im_end|>
-<|im_start|>assistant
-)"), params.prompt);
-            assert_equals(false, params.grammar.empty());
-        }
+    // Mistral Small 3.2 - FUNC_BRACKET_TAG format: [TOOL_CALLS]func_name[CALL_ID]id[ARGS]{...}
+    {
+        auto tst = peg_tester("models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("[TOOL_CALLS]special_function[CALL_ID]123456789[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_id)
+            .expect_reconstruction()
+            .run();
+
+        tst.test("[TOOL_CALLS]special_function[CALL_ID]000000001[ARGS]{\"arg1\": 1}"
+            "[TOOL_CALLS]special_function_with_opt[CALL_ID]000000002[ARGS]{\"arg1\": 1, \"arg2\": 2}")
+            .parallel_tool_calls(true)
+            .tools({
+                special_function_tool, special_function_tool_with_optional_param
+            })
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 1})", "000000001" },
+                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", "000000002" },
+            })
+            .expect_reconstruction()
+            .run();
 
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test single tool call with JSON format
-        common_chat_msg msg_single_tool_call;
-        msg_single_tool_call.role = "assistant";
-        msg_single_tool_call.tool_calls.push_back({"special_function", "{\"arg1\":1}", ""});
-        assert_msg_equals(
-            msg_single_tool_call,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with string argument
-        common_chat_msg msg_tool_call_string;
-        msg_tool_call_string.role = "assistant";
-        msg_tool_call_string.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_string,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with multiple arguments
-        common_chat_msg msg_multi_args;
-        msg_multi_args.role = "assistant";
-        msg_multi_args.tool_calls.push_back({"calculate", "{\"x\":10,\"y\":20,\"operation\":\"add\"}", ""});
-        assert_msg_equals(
-            msg_multi_args,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"calculate\", \"arguments\": {\"x\": 10, \"y\": 20, \"operation\": \"add\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test multiple tool calls in single array
-        common_chat_msg msg_multiple_tools;
-        msg_multiple_tools.role = "assistant";
-        msg_multiple_tools.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        msg_multiple_tools.tool_calls.push_back({"get_time", "{\"timezone\":\"UTC\"}", ""});
-        assert_msg_equals(
-            msg_multiple_tools,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}, {\"name\": \"get_time\", \"arguments\": {\"timezone\": \"UTC\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content before
-        common_chat_msg msg_content_before_tool;
-        msg_content_before_tool.role = "assistant";
-        msg_content_before_tool.content = "Let me check the weather for you.";
-        msg_content_before_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_before_tool,
-            test_chat_parse(
-                "Let me check the weather for you.<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with content after
-        common_chat_msg msg_content_after_tool;
-        msg_content_after_tool.role = "assistant";
-        msg_content_after_tool.content = "Here's the result.";
-        msg_content_after_tool.tool_calls.push_back({"get_weather", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_content_after_tool,
-            test_chat_parse(
-                "<|tool_call_start|>[{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}]<|tool_call_end|>Here's the result.",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Test tool call with newlines (common in LLM output)
-        common_chat_msg msg_tool_call_newlines;
-        msg_tool_call_newlines.role = "assistant";
-        msg_tool_call_newlines.tool_calls.push_back({"get_current_time", "{\"location\":\"Paris\"}", ""});
-        assert_msg_equals(
-            msg_tool_call_newlines,
-            test_chat_parse(
-                "<|tool_call_start|>[{\n    \"name\": \"get_current_time\",\n    \"arguments\": {\n        \"location\": \"Paris\"\n    }\n}]<|tool_call_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS}));
-
-        // Note: LFM2 uses JSON format for tool calls: [{"name": "...", "arguments": {...}}]
-        // Unlike other formats, LFM2 template does not render tool calls in conversation history,
-        // so we don't use test_templates() for tool call generation. Instead, the parsing tests
-        // above verify edge cases and format variations for the tool call output format.
-    }
 
+    }
+    // Devstral
     {
-        auto tmpls = read_templates("models/templates/MiniMax-M2.jinja");
-        std::vector<std::string> end_tokens{ "[e~[" };
-
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_MINIMAX_M2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_MINIMAX_M2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<minimax:tool_call><invoke name=\"special_function\"><parameter name=\"arg1\">1</parameter></invoke></minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_MINIMAX_M2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<minimax:tool_call>\n<invoke name=\"special_function_with_opt\">\n<parameter name=\"arg1\">1</parameter>\n<parameter name=\"arg2\">2</parameter>\n</invoke>\n</minimax:tool_call>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_NONE,
-                      /* ignore_whitespace_differences= */ true
-        );
+        auto tst = peg_tester("models/templates/unsloth-mistral-Devstral-Small-2507.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+        tst.test("Hello, world!\nWhat's up?[TOOL_CALLS]special_function[ARGS]{\"arg1\": 1}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .expect_reconstruction()
+            .run();
     }
 
     {
-        auto tmpls = read_templates("models/templates/GLM-4.6.jinja");
-        std::vector<std::string>   end_tokens{ "<|assistant|>", "<|observation|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_GLM_4_5, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\nHello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }), true);
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}), true);
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_GLM_4_5}
-            ), true);
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }), true);
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "\n<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "\n<think>I'm\nthinking</think>\n\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-            test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<tool_call>complex_function\n"
-                "<arg_key>name</arg_key>\n"
-                "<arg_value>John Doe</arg_value>\n"
-                "<arg_key>age</arg_key>\n"
-                "<arg_value>30</arg_value>\n"
-                "<arg_key>active</arg_key>\n"
-                "<arg_value>true</arg_value>\n"
-                "<arg_key>score</arg_key>\n"
-                "<arg_value>95.5</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<tool_call>web_search\n"
-                "<arg_key>query</arg_key>\n"
-                "<arg_value>\"From Zero\" Linkin Park album tracklist complete songs</arg_value>\n"
-                "<arg_key>limit</arg_key>\n"
-                "<arg_value>3</arg_value>\n"
-                "<arg_key>type</arg_key>\n"
-                "<arg_value>text</arg_value>\n"
-                "</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_GLM_4_5}); });
-
-        // Test interleaved thinking
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\n\nWhat's up?", "I'm\nthinkingThinking2", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?", "", "special_function", "{\"arg1\": 1}"),
-            "\n<think>I'm\nthinking</think>Hello, world!\n<think>Thinking2</think>What's up?\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_GLM_4_5,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "\n<think></think>\nHello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "\n<think></think>\n<tool_call>special_function\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "\n<think></think>\n<tool_call>special_function_with_opt\n<arg_key>arg1</arg_key>\n<arg_value>1</arg_value>\n<arg_key>arg2</arg_key>\n<arg_value>2</arg_value>\n</tool_call>\n",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ false,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        // Llama 3.1
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
     }
 
     {
-        auto tmpls = read_templates("models/templates/Kimi-K2-Thinking.jinja");
-        std::vector<std::string> end_tokens{ "<|im_end|>" };
-
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
-        assert_equals(COMMON_CHAT_FORMAT_KIMI_K2, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Test parsing regular content
-        assert_msg_equals(message_assist,
-            test_chat_parse(
-                "Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing content with thinking
-        assert_msg_equals(message_assist_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /* .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                }));
-
-        // Test parsing tool calls
-        assert_msg_equals(message_assist_call,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}));
-
-        // Test parsing tool calls with thinking
-        assert_msg_equals(message_assist_call_thoughts,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test tool calls with extra content
-        assert_msg_equals(message_assist_call_content,
-            test_chat_parse(
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {COMMON_CHAT_FORMAT_KIMI_K2}
-            ));
-
-        // Test tool calls with extra content AND thinking
-        assert_msg_equals(message_assist_call_thoughts_content,
-            test_chat_parse(
-                "<think>I'm\nthinking</think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>Hello, world!\nWhat's up?",
-                /* is_partial= */ false,
-                {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-                }));
-
-        // Test streaming
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_unparsed,
-            "<think>I'm\nthinking</think>\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(message_assist_call_thoughts_content,
-            "<think>I'm\nthinking\n</think>\n\nHello, world!\nWhat's up?\n\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>\n",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(message_assist_call_withopt,
-            "<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_NONE
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": \"123456\"}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": \"123456\"}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": [1, 2, \"345\", 6]}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": [1, 2, \"345\", 6]}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}"),
-            "<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n<|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": {\"12\": 34, \"5\": [67, 8], \"9\": \"10\"}}<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    /*  .format = */ COMMON_CHAT_FORMAT_KIMI_K2,
-                    /*  .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:0<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"),
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        test_parser_with_streaming(
-                simple_assist_msg(
-                        "Let me start by examining the relevant files to understand the current implementation.", "",
-                        "read_file",
-                        "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}"),
-                "Let me start by examining the relevant files to understand the current implementation."
-                "<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {COMMON_CHAT_FORMAT_KIMI_K2}); });
-        auto multi_tool_msg = simple_assist_msg("Let me call multiple tools.", "I'm thinking.");
-        multi_tool_msg.tool_calls.push_back({ "read_file", "{\"files\": [{\"path\": \"src/app/Partners.tsx\", \"line_ranges\": [\"1-100\"]}]}", "" });
-        multi_tool_msg.tool_calls.push_back({ "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}", "" });
-        multi_tool_msg.tool_calls.push_back({ "complex_function", "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}", "" });
-        multi_tool_msg.tool_calls.push_back({ "emoji_function", "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}", "" });
-        test_parser_with_streaming(multi_tool_msg,
-                "<think>I'm thinking.</think>Let me call multiple tools."
-                "<|tool_calls_section_begin|>"
-                "<|tool_call_begin|>functions.read_file:0<|tool_call_argument_begin|>"
-                "{\"files\":[{\"path\":\"src/app/Partners.tsx\",\"line_ranges\":[\"1-100\"]}]}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>"
-                "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.complex_function:2<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|>"
-                "<|tool_call_begin|>functions.emoji_function:3<|tool_call_argument_begin|>"
-                "{\"message\":\"Hello! 👋 🌟 🚀 Testing emojis: 😀😃😄😁 and symbols: ∑∏∆∇\"}"
-                "<|tool_call_end|>"
-                "<|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("", "I'm thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-        test_parser_with_streaming(
-                simple_assist_msg("Hello", "I'm thinkingI'm still thinking", "complex_function_in_think", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"),
-                "<think>I'm thinking<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function_in_think:0<|tool_call_argument_begin|>"
-                "{\"name\": \"John Doe\", \"age\": 30, \"active\": true, \"score\": 95.5}"
-                "<|tool_call_end|><|tool_calls_section_end|>I'm still thinking</think>Hello",
-            [&](const std::string &msg) { return test_chat_parse(msg, /* is_partial= */ true, {
-                    COMMON_CHAT_FORMAT_KIMI_K2,
-                    COMMON_REASONING_FORMAT_DEEPSEEK
-            }); });
-
-        // Test template rendering
-        common_chat_templates_inputs conversation_with_tools = inputs_tools;
-        conversation_with_tools.messages.push_back(simple_assist_msg("Let's do it", "Think first", "complex_function", "{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 1",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "complex_function",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("Continue", "Think next", "web_search", "{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 2",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "web_search",
-            /* .tool_call_id = */ "",
-        });
-        conversation_with_tools.messages.push_back(simple_assist_msg("CC", "Think last", "read_file", "{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}"));
-        conversation_with_tools.messages.push_back({
-            "tool",
-            "Tool response 3",
-            /* .content_parts = */ {},
-            /* .tool_calls = */ {},
-            /* .reasoning_content = */ "",
-            /* .tool_name = */ "read_file",
-            /* .tool_call_id = */ "",
-        });
-        assert_equals(common_chat_templates_apply(tmpls.get(), conversation_with_tools).prompt, std::string("<|im_system|>tool_declare<|im_middle|>[{\"type\": \"function\", \"function\": {\"name\": \"special_function\", \"description\": \"I'm special\", \"parameters\": {\"type\": \"object\", \"properties\": {\"arg1\": {\"type\": \"integer\", \"description\": \"The arg.\"}}, \"required\": [\"arg1\"]}}}]<|im_end|><|im_system|>system<|im_middle|>You are Kimi, an AI assistant created by Moonshot AI.<|im_end|><|im_user|>user<|im_middle|>Hey there!<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think first</think>Let's do it<|tool_calls_section_begin|><|tool_call_begin|>functions.complex_function:0<|tool_call_argument_begin|>{\"name\":\"John Doe\",\"age\":30,\"active\":true,\"score\":95.5}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>complex_function<|im_middle|>## Return of functions.complex_function:0\nTool response 1<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think next</think>Continue<|tool_calls_section_begin|><|tool_call_begin|>functions.web_search:1<|tool_call_argument_begin|>{\"query\":\"\\\"From Zero\\\" Linkin Park album tracklist complete songs\",\"limit\":3,\"type\":\"text\"}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>web_search<|im_middle|>## Return of functions.web_search:1\nTool response 2<|im_end|><|im_assistant|>assistant<|im_middle|><think>Think last</think>CC<|tool_calls_section_begin|><|tool_call_begin|>functions.read_file:2<|tool_call_argument_begin|>{\"args\": [{\"path\": \"src/providers/ThemeProvider.tsx\"}, {\"path\": \"src/components/Header.tsx\"}, {\"path\": \"src/components/ThemeToggle.tsx\"}, {\"path\": \"src/app/globals.css\"}, {\"path\": \"src/app/layout.tsx\"}]}<|tool_call_end|><|tool_calls_section_end|><|im_end|><|im_system|>read_file<|im_middle|>## Return of functions.read_file:2\nTool response 3<|im_end|><|im_assistant|>assistant<|im_middle|>"));
-
-        // Test template generation for regular content
-        test_templates(tmpls.get(), end_tokens, message_assist, tools,
-                      "<think></think>Hello, world!\nWhat's up?",
-                      /* expect_grammar_triggered= */ false);
-
-        // Test template generation for tool calls
-        test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-
-        // Test template generation for tools with optional parameters
-        test_templates(tmpls.get(), end_tokens, message_assist_call_noopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
-        test_templates(tmpls.get(), end_tokens, message_assist_call_withopt, tools,
-                      "<think></think><|tool_calls_section_begin|><|tool_call_begin|>functions.special_function_with_opt:0<|tool_call_argument_begin|>{\"arg1\": 1, \"arg2\": 2}<|tool_call_end|><|tool_calls_section_end|>",
-                      /* expect_grammar_triggered= */ true,
-                      /* test_grammar_if_triggered= */ true,
-                      /* common_reasoning_format= */ COMMON_REASONING_FORMAT_DEEPSEEK,
-                      /* ignore_whitespace_differences= */ true
-        );
+        // Llama 3.2
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
     }
 
     {
-        // Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
-        // but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
-        // schema-aware parameter parsing.
-        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-        assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
-
-        // Grammar and PEG parser should be generated with thinking_forced_open
-        {
-            common_chat_templates_inputs inputs;
-            inputs.messages = { message_user };
-            inputs.tools = { special_function_tool };
-            auto params = common_chat_templates_apply(tmpls.get(), inputs);
-            assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
-            assert_equals(true, params.thinking_forced_open);
-            assert_equals(false, params.grammar.empty());
-            assert_equals(false, params.parser.empty());
-            auto grammar = build_grammar(params.grammar);
-            GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar");
-        }
+        // Llama 3.3
+        auto tst = peg_tester("models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").tools({ python_tool }).expect(message_assist).expect_reconstruction().run();
     }
-}
-
-static void test_template_output_peg_parsers() {
-    printf("[%s]\n", __func__);
-
-    // JSON schemas
-    const char * invoice_schema = R"({
-        "type": "object",
-        "properties": {
-            "amount": {"type": "number"},
-            "date": {"type": "string"}
-        }
-    })";
 
+    // GPT-OSS format tests
     {
-        // Ministral-3-14B-Reasoning-2512
-        auto tmpls = read_templates("models/templates/mistralai-Ministral-3-14B-Reasoning-2512.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.expect.content = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]Hello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I'm\nthinking[/THINK]"
-                      R"([TOOL_CALLS]special_function[ARGS]{"arg1":1})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"([TOOL_CALLS]special_function[ARGS]{"arg1": 1})"
-                      R"([TOOL_CALLS]special_function_with_opt[ARGS]{"arg1": 1, "arg2": 2})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "[THINK]I need to output the invoice details in JSON[/THINK]"
-                      "```json\n"
-                      R"({"amount": 123.45, "date": "2025-12-03"})"
-                      "\n```";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/openai-gpt-oss-120b.jinja", detailed_debug);
+
+        // Basic content only - final channel
+        tst.test("<|channel|>final<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Basic content only - commentary channel
+        tst.test("<|channel|>commentary<|message|>Hello, world!\nWhat's up?").expect(message_assist).run();
+
+        // Analysis channel (reasoning) with final channel (content)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // Analysis channel (reasoning) with final channel (content) with reasoning_format = none
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
+            .expect_content("<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
+            .run();
+
+        // Analysis channel only (partial) - still works when reasoning format is set
+        tst.test("<|channel|>analysis<|message|>I'm\nthinking")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .is_partial(true)
+            .expect_reasoning("I'm\nthinking")
+            .run();
+
+        // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with recipient in channel header: "<|channel|>analysis to=functions.NAME<|message|>JSON"
+        tst.test("<|channel|>analysis to=functions.special_function<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with constraint: " to=functions.NAME<|channel|>analysis <|constrain|>json<|message|>JSON"
+        tst.test(" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call in commentary channel (channel header variant)
+        tst.test("<|channel|>commentary to=functions.special_function<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .run();
+
+        // Tool call with reasoning + content (analysis first, then tool call)
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
+               "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({ special_function_tool })
+            .expect(message_assist_call_thoughts)
+            .run();
+
+        // Complex tool calling
+        tst.test(
+            "<|channel|>analysis<|message|>Thinking about edit...<|end|>"
+            "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
+            "<|message|>{\"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
+            )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .tools({
+                {
+                    /* .name = */ "edit",
+                    /* .description = */ "Edit a file",
+                    /* .parameters = */ R"({
+                        "type": "object",
+                        "properties": {
+                            "oldString": {
+                                "type": "string",
+                                "description": "Old string to replace."
+                            },
+                            "newString": {
+                                "type": "string",
+                                "description": "New replacement string."
+                            },
+                            "replaceAll": {
+                                "type": "boolean",
+                                "description": "Whether to replace all occurences."
+                            }
+                        },
+                        "required": ["oldString", "newString"]
+                    })",
+                }
+            })
+            .expect_reasoning("Thinking about edit...")
+            .expect_tool_calls({
+                { "edit", R"({"oldString": "if (part < railCount - 1) {", "newString": "if (part < 4) {", "replaceAll": false})", {} }
+            })
+            .run();
+
+        // Structured output
+        tst.test(
+            "<|channel|>analysis<|message|>I need to output the invoice details in JSON<|end|>"
+            "<|start|>assistant<|channel|>final <|constrain|>json"
+            "<|message|>"
+            R"({"amount": 123.45, "date": "2025-12-03"})"
+            )
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .json_schema(invoice_schema)
+            .expect_reasoning("I need to output the invoice details in JSON")
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
+            .run();
+
+
+        // Unsolicited tool calls. There is no good way to handle these, so we return empty content.
+
+        // Builtin function - recipient in role
+        tst.test(
+               "<|channel|>analysis<|message|>I will execute python to say hello<|end|>"
+               "<|start|>assistant to=container.exec<|channel|>commentary<|message|>python3 -c 'print(\"hello\")'")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("I will execute python to say hello")
+            .expect_content("")
+            .run();
+
+        // Builtin function - recipient in channel
+        tst.test(
+               "<|channel|>analysis<|message|>I will execute python to say hello<|end|>"
+               "<|start|>assistant<|channel|>commentary to=python <|constrain|>code<|message|>print(\"hello\")")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect_reasoning("I will execute python to say hello")
+            .expect_content("")
+            .run();
+
+        // Edge cases
+
+        // "<|channel|>commentary to=assistant" before reasoning
+        tst.test(
+               "<|channel|>commentary to=assistant<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
+
+        // "<|channel|>commentary to=assistant" before final message
+        tst.test(
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary to=assistant<|channel|>final<|message|>Hello, world!\nWhat's "
+               "up?")
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+            .expect(message_assist_thoughts)
+            .run();
     }
 
     {
-        // Qwen3-Coder
-        auto tmpls = read_templates("models/templates/Qwen3-Coder.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with JSON parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=todo_list>\n"
-                "<parameter=todos>\n"
-                "[{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {todo_list_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "todo_list",
-                /* .arguments = */ "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/StepFun3.5-Flash.jinja", detailed_debug);
+        tst.test("I was thinking</think>\nNow I'm not.").
+            enable_thinking(true).
+            reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK).
+            expect_reasoning("I was thinking").
+            expect_content("Now I'm not.")
+        .run();
+
+        // Test that numeric-looking string values are coerced to strings per the schema
+        tst.test(
+               "Let me call the magic tool\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic>\n"
+               "<parameter=name>\nfooBar\n</parameter>\n"
+               "<parameter=ref>\n5123123\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_tool })
+            .expect_reasoning("Let me call the magic tool")
+            .expect_tool_calls({
+                { "magic", R"({"name": "fooBar", "ref": "5123123"})", {} },
+            })
+            .run();
+
+        // Test that numeric values are correctly interpreted as numbers when schema calls for number
+        tst.test(
+               "Let me call the special function\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=special_function>\n"
+               "<parameter=arg1>\n42555916\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool })
+            .expect_reasoning("Let me call the special function")
+            .expect_tool_calls({
+                { "special_function", R"({"arg1": 42555916})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Let me call the special function with opt\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=special_function_with_opt>\n"
+               "<parameter=arg1>\n42555916\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ special_function_tool_with_optional_param })
+            .expect_reasoning("Let me call the special function with opt")
+            .expect_tool_calls({
+                { "special_function_with_opt", R"({"arg1": 42555916})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Let me call the magic_int function\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic_int>\n"
+               "<parameter=ref>\n42555916\n</parameter>\n"
+               "<parameter=name>\nbaz\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Let me call the magic_int function")
+            .expect_tool_calls({
+                { "magic_int", R"({"ref": 42555916, "name": "baz"})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Call string_param with empty text\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=string_param>\n"
+               "<parameter=text>\n\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ string_param_tool })
+            .expect_reasoning("Call string_param with empty text")
+            .expect_tool_calls({
+                { "string_param", R"({"text": ""})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Test simple quoted unquoted\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=quoted_unquoted>\n"
+               "<parameter=quoted>\n\"foo\"\n</parameter>\n"
+               "<parameter=unquoted>\nfoo\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ quoted_unquoted_tool })
+            .expect_reasoning("Test simple quoted unquoted")
+            .expect_tool_calls({
+                { "quoted_unquoted", R"({"quoted": "\"foo\"", "unquoted": "foo"})", {} },
+            })
+            .run();
+
+        tst.test(
+               "Test complex quoted unquoted\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=quoted_unquoted>\n"
+               "<parameter=quoted>\n\"printf(\\\"foo\\\");\"\n</parameter>\n"
+               "<parameter=unquoted>\nprintf(\"foo\");\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ quoted_unquoted_tool })
+            .expect_reasoning("Test complex quoted unquoted")
+            .expect_tool_calls({
+                { "quoted_unquoted", R"({ "quoted" : "\"printf(\\\"foo\\\");\"", "unquoted": "printf(\"foo\");" })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test negative number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=magic_int>\n"
+               "<parameter=ref>\n-14\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ magic_int_tool })
+            .expect_reasoning("Test negative number")
+            .expect_tool_calls({
+                { "magic_int", R"({ "ref" : -14 })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test decimal number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=amount>\n"
+               "<parameter=orig>\n3.14\n</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ amount_tool })
+            .expect_reasoning("Test decimal number")
+            .expect_tool_calls({
+                { "amount", R"({ "orig" : 3.14 })", {} }
+            })
+            .run();
+
+            tst.test(
+               "Test imaginary number\n"
+               "</think>\n"
+               "<tool_call>\n"
+               "<function=imaginary_number>\n"
+               "<parameter=number>\n"
+               "{ \"real\": 3.14, \"imaginary\": 2.71 }\n"
+               "</parameter>\n"
+               "</function>\n"
+               "</tool_call>")
+            .enable_thinking(true)
+            .reasoning_format(COMMON_REASONING_FORMAT_DEEPSEEK)
+            .tools({ imaginary_number_tool })
+            .expect_reasoning("Test imaginary number")
+            .expect_tool_calls({
+                { "imaginary_number", R"({ "number" : {"real":3.14,"imaginary":2.71 } })", {} }
+            })
+            .run();
+
     }
 
+    // GigaChat V3
     {
-        // NVIDIA Nemotron-3 Nano
-        auto tmpls = read_templates("models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning with reasoning_format = none
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.expect.content = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-        });
-
-        // Test basic message and reasoning with reasoning_format = auto
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.enable_thinking = true;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.enable_thinking = false;
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/GigaChat3-10B-A1.8B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("<|message_sep|>\n\nfunction call<|role_sep|>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+
+        tst.test(
+            "Hello, world!\nWhat's up?"
+            "<|message_sep|>\n\nfunction call<|role_sep|>\n{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}"
+        )
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .expect_reconstruction()
+            .run();
     }
 
+    // GigaChat V3.1
     {
-        // Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open)
-        // Unlike Nemotron, Step-3.5-Flash always emits <think> regardless of enable_thinking,
-        // so all inputs must include a </think> delimiter.
-        auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
-
-        // Test basic message with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "</think>\nHello, world!\nWhat's up?";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-
-            t.expect = message_assist;
-        });
-
-        // Test tool call without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call;
-        });
-
-        // Test tool call with thinking
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {special_function_tool};
-
-            t.expect = message_assist_call_thoughts;
-        });
-
-        // Test parallel tool calls with thinking
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "I'm\nthinking\n</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test parallel tool calls without thinking content
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=special_function>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>\n"
-                "<tool_call>\n"
-                "<function=special_function_with_opt>\n"
-                "<parameter=arg1>\n"
-                "1\n"
-                "</parameter>\n"
-                "<parameter=arg2>\n"
-                "2\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        {},
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with code string parameter
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</parameter>\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test tool call with string parameter and no closing </parameter> tag
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-                "</think>\n"
-                "<tool_call>\n"
-                "<function=python>\n"
-                "<parameter=code>\n"
-                "def hello():\n"
-                "    print(\"Hello, world!\")\n"
-                "\n"
-                "hello()\n"
-                "</function>\n"
-                "</tool_call>";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.tools = {python_tool};
-
-            t.expect.tool_calls = {{
-                /* .name = */      "python",
-                /* .arguments = */ "{\"code\": \"def hello():\\n    print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
-                /* .id = */        {},
-            }};
-        });
-
-        // Test response format (JSON schema with thinking)
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input =
-              "I need to output the invoice details in JSON\n"
-              "</think>\n"
-              R"({"amount": 123.45, "date": "2025-12-03"})";
-            t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        auto tst = peg_tester("models/templates/GigaChat3.1-10B-A1.8B.jinja", detailed_debug);
+        tst.test("Hello, world!\nWhat's up?").expect(message_assist).expect_reconstruction().run();
+        tst.test("<|function_call|>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}")
+            .tools({ special_function_tool })
+            .expect(message_assist_call)
+            .expect_reconstruction()
+            .run();
+
+        tst.test(
+            "Hello, world!\nWhat's up?"
+            "<|function_call|>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}"
+        )
+            .tools({ special_function_tool })
+            .expect(message_assist_call_content)
+            .expect_reconstruction()
+            .run();
     }
+}
 
+// Test the developer role to system workaround with a simple mock template
+static void test_developer_role_to_system_workaround() {
+    LOG_DBG("%s\n", __func__);
+
+    // Simple mock template that supports system role
+    const std::string mock_template =
+        "{%- for message in messages -%}\n"
+        "  {{- '<|' + message.role + '|>' + message.content + '<|end|>' -}}\n"
+        "{%- endfor -%}\n"
+        "{%- if add_generation_prompt -%}\n"
+        "  {{- '<|assistant|>' -}}\n"
+        "{%- endif -%}";
+
+    auto tmpls = common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, mock_template));
+
+    // Test case 1: Developer message - should be changed to system
+    // After simplification we only test this case
     {
-        // Solar-Open-100B
-        auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
-
-        // Test basic message
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist;
-        });
-
-        // Test basic message and reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|><|begin|>assistant<|content|>Hello, world!\nWhat's up?";
-            t.expect = message_assist_thoughts;
-        });
-
-        // Test basic message and reasoning_effort = low
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>Hello, world!\nWhat's up?";
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist;
-        });
-
-        // Test tool call
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>123456789"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_call_id;
-        });
-
-        // Test tool call with reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call with reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.expect = message_assist_thoughts_call_idx;
-        });
-
-        // Test tool call without reasoning and tool_choice = required
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>";
-
-            t.params.tools = {special_function_tool};
-            t.params.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.expect = message_assist_call_idx;
-        });
-
-        // Test parallel tool calls
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I'm\nthinking<|end|>"
-                      "<|begin|>assistant<|tool_calls|>"
-                      "<|tool_call:begin|>0"
-                      "<|tool_call:name|>special_function"
-                      "<|tool_call:args|>{\"arg1\":1}"
-                      "<|tool_call:end|>"
-                      "<|tool_call:begin|>1"
-                      "<|tool_call:name|>special_function_with_opt"
-                      "<|tool_call:args|>{\"arg1\": 1, \"arg2\": 2}"
-                      "<|tool_call:end|>";
-
-            t.params.parallel_tool_calls = true;
-            t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
-
-            t.expect.reasoning_content = "I'm\nthinking";
-            t.expect.tool_calls = {{
-                /* .name = */      "special_function",
-                /* .arguments = */ R"({"arg1": 1})",
-                /* .id = */        "0",
-            }, {
-                /* .name = */      "special_function_with_opt",
-                /* .arguments = */ R"({"arg1": 1, "arg2": 2})",
-                /* .id = */        "1",
-            }};
-        });
-
-        // Test response format
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|think|>I need to output the invoice details in JSON<|end|>"
-                      "<|begin|>assistant<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.json_schema = invoice_schema;
-
-            t.expect.reasoning_content = "I need to output the invoice details in JSON";
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
-
-        // Test response format no reasoning
-        test_peg_parser(tmpls.get(), [&](auto & t) {
-            t.input = "<|content|>"
-                      R"({"amount": 123.45, "date": "2025-12-03"})";
-
-            t.params.chat_template_kwargs["reasoning_effort"] = "\"low\"";
-            t.params.json_schema = invoice_schema;
-
-            t.expect.content =R"({"amount": 123.45, "date": "2025-12-03"})";
-        });
+        common_chat_templates_inputs inputs;
+        common_chat_msg developer_msg;
+        developer_msg.role = "developer";
+        developer_msg.content = "You are a helpful developer assistant.";
+        inputs.messages = { developer_msg };
+        inputs.add_generation_prompt = false;
+
+        auto params = common_chat_templates_apply(tmpls.get(), inputs);
+
+        // The developer role should have been changed to system
+        if (params.prompt.find("<|developer|>") != std::string::npos) {
+            throw std::runtime_error("Test failed: developer role was not changed to system");
+        }
+        if (params.prompt.find("<|system|>You are a helpful developer assistant.<|end|>") == std::string::npos) {
+            throw std::runtime_error("Test failed: system message not found in output");
+        }
+        LOG_ERR("Test 1 passed: developer role changed to system\n");
     }
 }
 
 static void test_msg_diffs_compute() {
-    printf("[%s]\n", __func__);
+    LOG_DBG("%s\n", __func__);
     {
         common_chat_msg msg1;
 
@@ -3759,9 +4024,7 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = "Hello, world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg1;
@@ -3773,37 +4036,35 @@ static void test_msg_diffs_compute() {
         common_chat_msg_diff diff;
         diff.content_delta = " world!";
 
-        assert_equals(
-            {diff},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
 
         common_chat_msg msg1;
-        msg1.tool_calls = { { "special_function", "{\"ar", /* .id = */ "123" } };
+        msg1.tool_calls = {
+            { "special_function", "{\"ar", /* .id = */ "123" }
+        };
 
         common_chat_msg msg2;
-        msg2.tool_calls = { { "special_function", "{\"arg1\": 1}", /* .id = */ "123" } };
+        msg2.tool_calls = {
+            { "special_function", "{\"arg1\": 1}", /* .id = */ "123" }
+        };
 
         common_chat_msg_diff diff01;
-        diff01.tool_call_index = 0;
-        diff01.tool_call_delta.name = "special_function";
-        diff01.tool_call_delta.id = "123";
+        diff01.tool_call_index           = 0;
+        diff01.tool_call_delta.name      = "special_function";
+        diff01.tool_call_delta.id        = "123";
         diff01.tool_call_delta.arguments = "{\"ar";
 
-        assert_equals(
-            {diff01},
-            common_chat_msg_diff::compute_diffs(msg0, msg1));
+        assert_equals({ diff01 }, common_chat_msg_diff::compute_diffs(msg0, msg1));
 
         common_chat_msg_diff diff12;
-        diff12.tool_call_index = 0;
+        diff12.tool_call_index           = 0;
         // Note: neither id nor name change here.
         diff12.tool_call_delta.arguments = "g1\": 1}";
 
-        assert_equals(
-            {diff12},
-            common_chat_msg_diff::compute_diffs(msg1, msg2));
+        assert_equals({ diff12 }, common_chat_msg_diff::compute_diffs(msg1, msg2));
     }
     {
         common_chat_msg msg0;
@@ -3815,68 +4076,86 @@ static void test_msg_diffs_compute() {
         };
 
         common_chat_msg_diff diff1;
-        diff1.tool_call_index = 0;
-        diff1.tool_call_delta.name = "f1";
-        diff1.tool_call_delta.id = "123";
+        diff1.tool_call_index           = 0;
+        diff1.tool_call_delta.name      = "f1";
+        diff1.tool_call_delta.id        = "123";
         diff1.tool_call_delta.arguments = "{\"arg1\": 1}";
 
         common_chat_msg_diff diff2;
-        diff2.tool_call_index = 1;
-        diff2.tool_call_delta.name = "f2";
-        diff2.tool_call_delta.id = "222";
+        diff2.tool_call_index           = 1;
+        diff2.tool_call_delta.name      = "f2";
+        diff2.tool_call_delta.id        = "222";
         diff2.tool_call_delta.arguments = "{\"arg2\": 2}";
 
-        assert_equals(
-            {diff1, diff2},
-            common_chat_msg_diff::compute_diffs(msg0, msg2));
+        assert_equals({ diff1, diff2 }, common_chat_msg_diff::compute_diffs(msg0, msg2));
     }
 }
 
 int main(int argc, char ** argv) {
-    common_log_set_verbosity_thold(999);
+    bool detailed_debug    = false;
+    bool only_run_filtered = false;
+
+    // Check for --template flag
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+        if (arg == "--template" && i + 1 < argc) {
+            g_template_filter = argv[++i];
+            // Only run PEG parser tests with the filter
+            only_run_filtered = true;
+        }
+        if (arg == "--detailed") {
+            detailed_debug = true;
+            common_log_set_verbosity_thold(999);
+        }
+        if (arg == "--force-reconstruction-test") {
+            g_force_reconstruction_test = true;
+            only_run_filtered          = true;
+        }
+    }
+
+    if (only_run_filtered) {
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All template tests passed!" << '\n';
+        return 0;
+    }
 
-    // try {
 #ifndef _WIN32
-        if (argc > 1) {
-            common_chat_templates_inputs inputs;
-            common_chat_msg msg;
-            msg.role = "user";
-            msg.content = "Hey";
-            inputs.messages = {msg};
-            inputs.tools = { special_function_tool };
-
-            std::cout << "| Template | Format |\n";
-            std::cout << "|----------|--------|\n";
-
-            for (int i = 1; i < argc; i++) {
-                try {
-                    std::string path = argv[i];
-                    if (path.rfind(".jinja") != path.size() - 6) {
-                        std::cerr << "Skipping non-jinja file: " << path << '\n';
-                        continue;
-                    }
-                    auto tmpls = read_templates(path);
-                    auto parts  = string_split(path, "/");
-                    auto name   = parts[parts.size() - 1];
-                    auto format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
-                    std::cout << "| " << name << " | " << format << " |\n";
-                } catch (const std::exception & e) {
-                    std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
+    if (argc > 1) {
+        common_chat_templates_inputs inputs;
+        common_chat_msg              msg;
+        msg.role        = "user";
+        msg.content     = "Hey";
+        inputs.messages = { msg };
+        inputs.tools    = { special_function_tool };
+
+        std::cout << "| Template | Format |\n";
+        std::cout << "|----------|--------|\n";
+
+        for (int i = 1; i < argc; i++) {
+            try {
+                std::string path = argv[i];
+                if (path.rfind(".jinja") != path.size() - 6) {
+                    std::cerr << "Skipping non-jinja file: " << path << '\n';
+                    continue;
                 }
+                auto         tmpls  = read_templates(path);
+                auto         parts  = string_split(path, "/");
+                const auto & name   = parts[parts.size() - 1];
+                const auto * format = common_chat_format_name(common_chat_templates_apply(tmpls.get(), inputs).format);
+                std::cout << "| " << name << " | " << format << " |\n";
+            } catch (const std::exception & e) {
+                std::cerr << "Failed to process " << argv[i] << ": " << e.what() << '\n';
             }
-        } else
-#endif
-        {
-            test_msg_diffs_compute();
-            test_msgs_oaicompat_json_conversion();
-            test_tools_oaicompat_json_conversion();
-            test_template_output_parsers();
-            test_template_output_peg_parsers();
-            std::cout << "\n[chat] All tests passed!" << '\n';
         }
-        return 0;
-    // } catch (const std::exception & e) {
-    //     std::cerr << "Error: " << e.what() << '\n';
-    //     return 1;
-    // }
+    } else
+#endif
+    {
+        test_msg_diffs_compute();
+        test_msgs_oaicompat_json_conversion();
+        test_tools_oaicompat_json_conversion();
+        test_developer_role_to_system_workaround();
+        test_template_output_peg_parsers(detailed_debug);
+        std::cout << "\n[chat] All tests passed!" << '\n';
+    }
+    return 0;
 }
diff --git a/tests/test-jinja.cpp b/tests/test-jinja.cpp
index 05ea8ca9e9..4b490dc5d8 100644
--- a/tests/test-jinja.cpp
+++ b/tests/test-jinja.cpp
@@ -387,6 +387,24 @@ static void test_expressions(testing & t) {
         "Bob"
     );
 
+    test_template(t, "empty computed member defaults to undefined",
+        "{{ a[]|default('fallback') }}",
+        {{"a", {{"name", "Bob"}}}},
+        "fallback"
+    );
+
+    test_template(t, "empty computed member is undefined",
+        "{{ a[] is undefined }}",
+        {{"a", {{"name", "Bob"}}}},
+        "True"
+    );
+
+    test_template(t, "undefined computed member is undefined",
+        "{{ a[undefined] is undefined }}",
+        {{"a", {{"name", "Bob"}}}},
+        "True"
+    );
+
     test_template(t, "array access",
         "{{ items[1] }}",
         {{"items", json::array({"a", "b", "c"})}},
@@ -429,6 +447,18 @@ static void test_expressions(testing & t) {
         "hello world"
     );
 
+    test_template(t, "string repetition",
+        "{{ 'ab' * 3 }}",
+        json::object(),
+        "ababab"
+    );
+
+    test_template(t, "reversed string repetition",
+        "{{ 3 * 'ab' }}",
+        json::object(),
+        "ababab"
+    );
+
     test_template(t, "ternary",
         "{{ 'yes' if cond else 'no' }}",
         {{"cond", true}},
@@ -505,6 +535,18 @@ static void test_filters(testing & t) {
         "hello"
     );
 
+    test_template(t, "upper array",
+        "{{ items|upper }}",
+        {{"items", json::array({"hello", "world"})}},
+        "['HELLO', 'WORLD']"
+    );
+
+    test_template(t, "upper dict",
+        "{{ items|upper }}",
+        {{"items", {{"hello", "world"}}}},
+        "{'HELLO': 'WORLD'}"
+    );
+
     test_template(t, "capitalize",
         "{{ 'heLlo World'|capitalize }}",
         json::object(),
@@ -663,6 +705,33 @@ static void test_filters(testing & t) {
         "\"\\u2713\""
     );
 
+    test_template(t, "tojson ensure_ascii=true nested object",
+        "{{ data|tojson(ensure_ascii=true) }}",
+        {{"data", {
+            {"text", "\u2713"},
+            {"items", json::array({"é", {{"snowman", "☃"}}})}
+        }}},
+        "{\"text\": \"\\u2713\", \"items\": [\"\\u00e9\", {\"snowman\": \"\\u2603\"}]}"
+    );
+
+    test_template(t, "tojson ensure_ascii=true indent=2",
+        "{{ data|tojson(ensure_ascii=true, indent=2) }}",
+        {{"data", {
+            {"text", "\u2713"},
+            {"nested", {{"accent", "é"}}}
+        }}},
+        "{\n  \"text\": \"\\u2713\",\n  \"nested\": {\n    \"accent\": \"\\u00e9\"\n  }\n}"
+    );
+
+    test_template(t, "tojson ensure_ascii=true preserves existing escapes",
+        "{{ data|tojson(ensure_ascii=true) }}",
+        {{"data", {
+            {"emoji", "😀"},
+            {"line", "a\nb"}
+        }}},
+        "{\"emoji\": \"\\ud83d\\ude00\", \"line\": \"a\\nb\"}"
+    );
+
     test_template(t, "tojson sort_keys=true",
         "{{ data|tojson(sort_keys=true) }}",
         {{"data", {{"b", 2}, {"a", 1}}}},
@@ -741,6 +810,12 @@ static void test_filters(testing & t) {
         "hello"
     );
 
+    test_template(t, "int filter on integer is identity",
+        "{{ value|int }}",
+        {{"value", 7}},
+        "7"
+    );
+
     test_template(t, "none to string",
         "{{ x|string }}",
         {{"x", nullptr}},
@@ -884,6 +959,24 @@ static void test_macros(testing & t) {
         json::object(),
         "Hi Guest"
     );
+
+    test_template(t, "macro kwargs input",
+        "{% macro my_func(a, b=False) %}{% if b %}{{ a }}{% else %}nope{% endif %}{% endmacro %}{{ my_func(1, b=True) }}",
+        json::object(),
+        "1"
+    );
+
+    test_template(t, "macro with multiple args",
+        "{% macro add(a, b, c=0) %}{{ a + b + c }}{% endmacro %}{{ add(1, 2) }},{{ add(1, 2, 3) }},{{ add(1, b=10) }},{{ add(1, 2, c=5) }}",
+        json::object(),
+        "3,6,11,8"
+    );
+
+    test_template(t, "macro with kwarg out-of-order input",
+        "{% macro greet(first, last, greeting='Hello') %}{{ greeting }}, {{ first }} {{ last }}{% endmacro %}{{ greet(last='Smith', first='John') }},{{ greet(last='Doe', greeting='Hi', first='Jane') }}",
+        json::object(),
+        "Hello, John Smith,Hi, Jane Doe"
+    );
 }
 
 static void test_namespace(testing & t) {
@@ -2248,6 +2341,7 @@ static void test_fuzzing(testing & t) {
 
     t.test("malformed templates (should error, not crash)", [&](testing & t) {
         const std::vector<std::string> malformed = {
+            "",
             "{{ x",
             "{% if %}",
             "{% for %}",
@@ -2268,6 +2362,11 @@ static void test_fuzzing(testing & t) {
         for (const auto & tmpl : malformed) {
             t.assert_true("malformed: " + tmpl, fuzz_test_template(tmpl, json::object()));
         }
+        std::string tmpl = "{% for message in messages %}{{ message.role | string }} : {{ message.content if ('content' in message and message.content is not none) }}{% endfor %";
+        while (tmpl.length() > 0) {
+            t.assert_true("malformed: " + tmpl, fuzz_test_template(tmpl, json::object()));
+            tmpl.pop_back();
+        }
     });
 
     t.test("type coercion edge cases", [&](testing & t) {
@@ -2388,4 +2487,12 @@ static void test_fuzzing(testing & t) {
             t.assert_true("builtin " + type_name + "." + fn_name + " #" + std::to_string(i), fuzz_test_template(tmpl, vars));
         }
     });
+
+    t.test("tojson ensure_ascii=true with invalid utf-8", [&](testing & t) {
+        t.assert_true("invalid utf-8 does not crash",
+            fuzz_test_template(
+                "{{ data|tojson(ensure_ascii=true) }}",
+                {{"data", std::string("hello\xfe\xffworld")}}
+            ));
+    });
 }
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
index 8d8f4aeb2f..22a0fc9e60 100755
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -565,6 +565,104 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         )"""
     });
 
+    test({
+        SUCCESS,
+        "array with empty items",
+        R"""({
+            "type": "array",
+            "items": {}
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            item ::= object
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= "[" space (item ("," space item)*)? "]" space
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "array with empty items and prefixItems",
+        R"""({
+            "type": "array",
+            "items": {},
+            "prefixItems": { "type": "string" }
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            item ::= object
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= "[" space (item ("," space item)*)? "]" space
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "array with empty items",
+        R"""({
+            "type": "array",
+            "items": {}
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            item ::= object
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= "[" space (item ("," space item)*)? "]" space
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "array with empty items and prefixItems",
+        R"""({
+            "type": "array",
+            "items": {},
+            "prefixItems": { "type": "string" }
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            item ::= object
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= "[" space (item ("," space item)*)? "]" space
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
     test({
         SUCCESS,
         "number",
@@ -1336,6 +1434,26 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
         )"""
     });
 
+    test({
+        SUCCESS,
+        "description only (no type) treated as unconstrained",
+        R"""({"description": "The 0-based index of the last line to be retrieved (inclusive). If None, read until the end of the file."})""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+            decimal-part ::= [0-9]{1,16}
+            integral-part ::= [0] | [1-9] [0-9]{0,15}
+            null ::= "null" space
+            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= value
+            space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            string ::= "\"" char* "\"" space
+            value ::= object | array | string | number | boolean | null
+        )"""
+    });
+
     test({
         SUCCESS,
         "literal string with escapes",
@@ -1452,6 +1570,47 @@ int main() {
         }
     });
 
+    // C++ only tests (features not yet supported in JS/Python implementations)
+    {
+        fprintf(stderr, "#\n# Testing C++ only features\n#\n");
+        auto run = [](const TestCase & tc) {
+            fprintf(stderr, "- %s\n", tc.name.c_str());
+            try {
+                tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
+                tc.verify_status(SUCCESS);
+            } catch (const std::invalid_argument & ex) {
+                fprintf(stderr, "Error: %s\n", ex.what());
+                tc.verify_status(FAILURE);
+            }
+        };
+
+        run({
+            SUCCESS,
+            "regexp with non-capturing group",
+            R"""({
+                "type": "string",
+                "pattern": "^(?:foo|bar)baz$"
+            })""",
+            R"""(
+                root ::= "\"" (("foo" | "bar") "baz") "\"" space
+                space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            )""",
+        });
+
+        run({
+            SUCCESS,
+            "regexp with nested non-capturing groups",
+            R"""({
+                "type": "string",
+                "pattern": "^(?:(?:ab)+c)?d$"
+            })""",
+            R"""(
+                root ::= "\"" ((("ab")+ "c")? "d") "\"" space
+                space ::= | " " | "\n"{1,2} [ \t]{0,20}
+            )""",
+        });
+    }
+
     if (getenv("LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR")) {
         fprintf(stderr, "\033[33mWARNING: Skipping slow tests on emulator.\n\033[0m");
     } else {
diff --git a/tests/test-peg-parser.cpp b/tests/test-peg-parser.cpp
index 220745d029..7d22d77612 100644
--- a/tests/test-peg-parser.cpp
+++ b/tests/test-peg-parser.cpp
@@ -20,6 +20,7 @@ int main(int argc, char *argv[]) {
     t.test("json", test_json_parser);
     t.test("gbnf", test_gbnf_generation);
     t.test("serialization", test_json_serialization);
+    t.test("python-dict", test_python_dict_parser);
 
     return t.summary();
 }
diff --git a/tests/test-reasoning-budget.cpp b/tests/test-reasoning-budget.cpp
new file mode 100644
index 0000000000..3028fb4d8f
--- /dev/null
+++ b/tests/test-reasoning-budget.cpp
@@ -0,0 +1,237 @@
+#include "reasoning-budget.h"
+#include "unicode.h"
+
+#include "llama.h"
+#include "ggml.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <cmath>
+#include <cstddef>
+#include <cstdio>
+#include <string>
+#include <vector>
+
+// Reasoning budget sampler test helper
+// These tests use nullptr vocab which safely falls back to treating all tokens as complete
+// (The UTF-8 boundary detection logic is tested separately in test_utf8_boundary_detection)
+static void test_reasoning_budget(
+    const char * test_name,
+    const std::vector<llama_token> & sequence,
+    const std::vector<llama_token> & start_tokens,
+    const std::vector<llama_token> & end_tokens,
+    const std::vector<llama_token> & forced_tokens,
+    int32_t budget,
+    common_reasoning_budget_state initial_state,
+    size_t expected_force_start,   // token index where forcing should start (SIZE_MAX = never)
+    size_t expected_force_end      // token index where forcing should end (after this, no more forcing)
+) {
+    // Find the maximum token ID to ensure our vocab covers all tokens
+    llama_token max_token = 0;
+    for (auto t : sequence) max_token = std::max(max_token, t);
+    for (auto t : start_tokens) max_token = std::max(max_token, t);
+    for (auto t : end_tokens) max_token = std::max(max_token, t);
+    for (auto t : forced_tokens) max_token = std::max(max_token, t);
+
+    // Create a minimal sampler with mock vocabulary
+    // For this test, we use nullptr as vocab since we're testing state transitions
+    // The UTF-8 boundary check will treat all tokens as complete (safe fallback)
+    auto * sampler = common_reasoning_budget_init(
+        nullptr,  // vocab - not used for basic state machine tests
+        start_tokens,
+        end_tokens,
+        forced_tokens,
+        budget,
+        initial_state
+    );
+
+    // Create a test token data array for checking forcing behavior
+    // Vocab size must be large enough to include all tokens (start, end, forced, sequence)
+    std::vector<llama_token_data> cur;
+    const size_t n_vocab = (size_t)max_token + 1;
+    for (size_t i = 0; i < n_vocab; i++) {
+        cur.emplace_back(llama_token_data{(llama_token)i, logf((float)(i+1)), 0.0f});
+    }
+    llama_token_data_array cur_p = { cur.data(), cur.size(), -1, false };
+
+    size_t actual_force_start = SIZE_MAX;
+    size_t actual_force_end = SIZE_MAX;
+
+    // Feed the sequence and track when forcing occurs
+    for (size_t i = 0; i < sequence.size(); i++) {
+        // Check if we're in forcing state by applying and seeing if logits are modified
+        cur_p.selected = -1;
+        for (size_t j = 0; j < cur.size(); j++) {
+            cur[j].logit = logf((float)(j+1));  // reset logits
+        }
+
+        llama_sampler_apply(sampler, &cur_p);
+
+        // Check if forcing is active (all logits except one should be -INFINITY)
+        size_t finite_count = 0;
+        llama_token finite_token = -1;
+        for (size_t j = 0; j < cur.size(); j++) {
+            if (std::isfinite(cur[j].logit)) {
+                finite_count++;
+                finite_token = cur[j].id;
+            }
+        }
+
+        llama_sampler_accept(sampler, sequence[i]);
+
+        fprintf(stderr, "    i=%zu: token=%d, finite_count=%zu, finite_token=%d\n", i, (int)sequence[i], finite_count, (int)finite_token);
+
+        if (finite_count == 1) {
+            if (actual_force_start == SIZE_MAX) {
+                actual_force_start = i;
+            }
+            actual_force_end = i;
+        } else if (actual_force_start != SIZE_MAX && actual_force_end != SIZE_MAX) {
+            // Forcing stopped
+            break;
+        }
+    }
+
+    llama_sampler_free(sampler);
+
+    // Verify forcing occurred at expected positions
+    if (expected_force_start == SIZE_MAX) {
+        if (actual_force_start != SIZE_MAX) {
+            fprintf(stderr, "Test '%s' FAILED: Expected no forcing, but forcing occurred at %zu\n", test_name, actual_force_start);
+            GGML_ASSERT(false && "Expected no forcing, but forcing occurred");
+        }
+    } else {
+        if (actual_force_start == SIZE_MAX) {
+            fprintf(stderr, "Test '%s' FAILED: Expected forcing but none occurred\n", test_name);
+            GGML_ASSERT(false && "Expected forcing but none occurred");
+        }
+        if (actual_force_start != expected_force_start) {
+            fprintf(stderr, "Test '%s' FAILED: Forcing started at %zu, expected %zu\n", test_name, actual_force_start, expected_force_start);
+            GGML_ASSERT(false && "Forcing started at wrong position");
+        }
+    }
+
+    if (expected_force_end != SIZE_MAX) {
+        if (actual_force_end < expected_force_end) {
+            fprintf(stderr, "Test '%s' FAILED: Forcing ended at %zu, expected >= %zu\n", test_name, actual_force_end, expected_force_end);
+            GGML_ASSERT(false && "Forcing ended too early");
+        }
+    }
+
+    fprintf(stderr, "  Test '%s' passed (force_start=%zu, force_end=%zu)\n", test_name, actual_force_start, actual_force_end);
+    (void)sequence;
+}
+
+// UTF-8 boundary detection unit test
+// Tests common_utf8_is_complete() from reasoning-budget.h
+static void test_utf8_boundary_detection() {
+    // Complete sequences
+    GGML_ASSERT(common_utf8_is_complete("hello"));
+    GGML_ASSERT(common_utf8_is_complete(""));
+    GGML_ASSERT(common_utf8_is_complete("\xC2\xA0"));            // complete 2-byte UTF-8 (U+00A0)
+    GGML_ASSERT(common_utf8_is_complete("\xE2\x80\x9C"));        // complete 3-byte UTF-8 (left double quote)
+    GGML_ASSERT(common_utf8_is_complete("\xF0\x9F\x98\x80"));    // complete 4-byte UTF-8 (emoji)
+    GGML_ASSERT(common_utf8_is_complete("abc\xC3\xA9"));         // ASCII + complete 2-byte
+
+    // Incomplete sequences
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xC2", 1)));            // 2-byte start, missing continuation
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xE2\x80", 2)));        // 3-byte start + 1 cont, missing 1
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xE2", 1)));            // 3-byte start, missing 2
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xF0\x9F\x98", 3)));    // 4-byte start + 2 cont, missing 1
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xF0\x9F", 2)));        // 4-byte start + 1 cont, missing 2
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\xF0", 1)));            // 4-byte start, missing 3
+    GGML_ASSERT(!common_utf8_is_complete(std::string("\x80", 1)));            // orphan continuation byte
+
+    // Mixed: ASCII followed by start of multi-byte
+    GGML_ASSERT(!common_utf8_is_complete(std::string("hello\xC3", 6)));       // ASCII + incomplete 2-byte
+    GGML_ASSERT(common_utf8_is_complete(std::string("hello\xC3\xA9", 7)));    // ASCII + complete 2-byte
+}
+
+int main(void) {
+    // Reasoning budget sampler tests
+    printf("Testing reasoning budget sampler... ");
+
+    // Test 1: Basic budget with start/end tokens - no forcing (natural end before budget exhausted)
+    {
+        const std::vector<llama_token> start = {100};  // start token
+        const std::vector<llama_token> end = {101};    // end token
+        const std::vector<llama_token> forced = {102}; // forced token (not used in this test)
+        const std::vector<llama_token> sequence = {100, 50, 51, 101, 52}; // start, two tokens, end, one more
+
+        test_reasoning_budget("natural end before budget exhausted", sequence, start, end, forced,
+            5,      // budget of 5 tokens
+            REASONING_BUDGET_IDLE,
+            SIZE_MAX, SIZE_MAX); // no forcing expected (natural end)
+    }
+
+    // Test 2: Budget exhausted, forcing should occur
+    // Flow: i=0 apply()->passthrough, accept(100)->COUNTING; i=1 accept(50)->remaining=1
+    // i=2 accept(51)->remaining=0->FORCING; i=3 apply() forces token[0]; i=4 apply() forces token[1]
+    // At i=4, accept() advances force_pos to 2 which equals forced_tokens.size(), so state becomes DONE
+    {
+        const std::vector<llama_token> start = {100};
+        const std::vector<llama_token> end = {101};
+        const std::vector<llama_token> forced = {102, 101}; // forced message + end
+        const std::vector<llama_token> sequence = {100, 50, 51, 52, 53}; // start + 4 tokens (budget=2)
+
+        test_reasoning_budget("budget exhausted forcing", sequence, start, end, forced,
+            2,      // budget of 2 tokens
+            REASONING_BUDGET_IDLE,
+            3,      // forcing starts at i=3 (accept at i=2 depletes budget, apply at i=3 forces)
+            4);     // forcing continues through i=4 (accept at i=4 transitions to DONE)
+    }
+
+    // Test 3: Activate immediately with budget=0, forcing should start right away
+    // Flow: init promotes COUNTING+budget=0 to FORCING, so apply() sees FORCING at i=0
+    {
+        const std::vector<llama_token> start = {100};
+        const std::vector<llama_token> end = {101};
+        const std::vector<llama_token> forced = {102, 101};
+        const std::vector<llama_token> sequence = {100, 50, 51, 52}; // start token first, then 3 tokens
+
+        test_reasoning_budget("activate immediately budget=0", sequence, start, end, forced,
+            0,      // budget of 0 tokens
+            REASONING_BUDGET_COUNTING, // starts counting, promoted to FORCING since budget=0
+            0,      // forcing starts at i=0 (initialized in FORCING, apply forces immediately)
+            1);     // forcing continues through i=1 (accept at i=1 transitions to DONE)
+    }
+
+    // Test 4: No start/end tokens configured - passthrough (no forcing)
+    {
+        const std::vector<llama_token> start = {};
+        const std::vector<llama_token> end = {};
+        const std::vector<llama_token> forced = {102};
+        const std::vector<llama_token> sequence = {50, 51, 52, 53};
+
+        test_reasoning_budget("no start/end configured", sequence, start, end, forced,
+            2,      // budget
+            REASONING_BUDGET_IDLE,
+            SIZE_MAX, SIZE_MAX); // no forcing (no start/end configured)
+    }
+
+    // Test 5: Activate immediately with budget > 0, count down then force
+    // Flow: i=0 accept(50)->remaining=1, i=1 accept(51)->remaining=0->FORCING
+    // Forcing starts at i=2 (apply sees FORCING after accept at i=1 transitioned)
+    {
+        const std::vector<llama_token> start = {100};
+        const std::vector<llama_token> end = {101};
+        const std::vector<llama_token> forced = {102, 101};
+        const std::vector<llama_token> sequence = {50, 51, 52, 53};
+
+        test_reasoning_budget("activate immediately with budget", sequence, start, end, forced,
+            2,      // budget of 2 tokens
+            REASONING_BUDGET_COUNTING,
+            2,      // forcing starts at i=2 (after 2 accepts deplete budget, apply at i=2 forces)
+            3);     // forcing continues through i=3
+    }
+
+    printf("OK (5 tests passed)\n");
+
+    printf("Testing UTF-8 boundary detection... ");
+    test_utf8_boundary_detection();
+    printf("OK\n");
+
+    return 0;
+}