Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ add_library(${TARGET} STATIC
arg.cpp
arg.h
base64.hpp
chat-parser.cpp
chat-parser.h
chat-parser-xml-toolcall.h
chat-parser-xml-toolcall.cpp
chat-auto-parser-analyzer.cpp
chat-auto-parser-generator.cpp
chat-auto-parser-helpers.cpp
chat-auto-parser.h
chat-peg-parser.cpp
chat-peg-parser.h
chat.cpp
Expand Down
1,461 changes: 1,461 additions & 0 deletions common/chat-auto-parser-analyzer.cpp

Large diffs are not rendered by default.

250 changes: 250 additions & 0 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#include "chat-auto-parser-helpers.h"
#include "chat-auto-parser.h"
#include "chat-peg-parser.h"
#include "chat.h"
#include "json-schema-to-grammar.h"
#include "log.h"
#include "nlohmann/json.hpp"

#include <optional>

using json = nlohmann::ordered_json;

common_chat_params universal_peg_generator::generate_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs) {
common_chat_params data;

try {
LOG_DBG("%s\n", __func__);

// Patch messages if template requires non-null content
// Some templates (e.g., iquest) render null as "None" when concatenating strings
std::optional<json> messages_override;
if (analysis.tools.requires_nonnull_content && !inputs.messages.empty()) {
LOG_DBG("Patching null content to empty string (template requires non-null content)\n");
json patched_messages = inputs.messages;
for (auto & msg : patched_messages) {
if (msg.contains("content") && msg["content"].is_null()) {
msg["content"] = "";
}
}
messages_override = patched_messages;
}

if (inputs.messages.empty()) {
// Some templates don't handle empty messages well - always leave something in
json message = {
{ { "role", "user" }, { "content", "Hello" } }
};
messages_override.emplace(message);
}

// Calculate prompt first to detect forced thinking
data.prompt = common_chat_template_direct_apply(tmpl, inputs, messages_override);

// Determine if thinking is forced open based on prompt ending
bool thinking_forced_open = false;
if (analysis.content.reasoning_mode == content_structure::REASONING_FORCED_OPEN) {
if (inputs.enable_thinking) {
thinking_forced_open = true;
LOG_DBG("Thinking forced open based on template analysis\n");
} else {
// Template ends with reasoning start marker but thinking is disabled
// Append the end marker to close it
data.prompt += analysis.content.reasoning_end;
LOG_DBG("Appended reasoning end marker since thinking is disabled\n");
}
}
data.thinking_forced_open = thinking_forced_open;

// Build the unified parser
auto arena = build_parser(analysis, tmpl, inputs, thinking_forced_open);
data.parser = arena.save();

// Determine format
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;

if (has_tools && analysis.tools.supports_tools) {
// Unified format that handles both JSON and tagged tool calls
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser with tool support (format: PEG_NATIVE)\n");
} else if (analysis.content.reasoning_mode != content_structure::REASONING_NONE) {
// Reasoning markers detected - use PEG parser to handle thinking blocks
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for reasoning handling (format: PEG_NATIVE)\n");
} else if (analysis.content.content_mode != content_structure::CONTENT_PLAIN) {
// Content markers detected - use PEG parser to strip them even without tools
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for content marker stripping (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
// Recipient-based format (e.g., Functionary v3.2): >>>recipient\n{content}
// Need PEG parser to handle recipient delimiter parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for recipient-based format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Tag-with-name format (e.g., func_name\n{args} for Functionary)
// Need PEG parser to handle function name parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for tag-with-name format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG) {
// Bracket-tag format (e.g., [TOOL_CALLS]name[CALL_ID]id[ARGS]{...} for Mistral Small 3.2)
// Need PEG parser to handle bracket tag parsing
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for bracket-tag format (format: PEG_NATIVE)\n");
} else if (analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Prefixed-indexed format (e.g., Kimi-K2)
// Need PEG parser to handle namespace and indexed format
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
LOG_DBG("Generated unified parser for prefixed-indexed format (format: PEG_NATIVE)\n");
} else {
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
LOG_DBG("Generated unified parser without tools or content markers (format: CONTENT_ONLY)\n");
}

// Determine trigger word for lazy grammar
std::string trigger_word;
if (!analysis.tools.tool_section_start.empty() ||
analysis.tools.function_format == tool_call_structure::FUNC_RECIPIENT_BASED) {
trigger_word = analysis.tools.tool_section_start;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
trigger_word = analysis.tools.function_prefix;
} else if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// For formats with per-call markers, use per_call_start as trigger
trigger_word = analysis.tools.per_call_start;
}

// Build grammar for tool calls
data.grammar_lazy = analysis.tools.supports_tools && has_tools;

// For FUNC_TAG_WITH_NAME with empty prefix (Functionary), disable lazy grammar
// since there's no clear trigger word - constrain from the start
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
analysis.tools.function_prefix.empty()) {
data.grammar_lazy = false;
}

if (data.grammar_lazy) {
if (!trigger_word.empty()) {
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_word });
}
}

// Build grammar
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
if (inputs.tools.is_array()) {
for (const auto & tool : inputs.tools) {
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
continue;
}
const auto & function = tool.at("function");
if (function.contains("parameters")) {
auto params = function.at("parameters");
builder.resolve_refs(params);
}
}
}
arena.build_grammar(builder, data.grammar_lazy);
});

// Set preserved tokens from analysis
data.preserved_tokens = analysis.preserved_tokens;

LOG_DBG("=== UNIFIED PEG PARSER GENERATION COMPLETED ===\n");

} catch (const std::exception & e) {
LOG_DBG("Unified parser generation failed: %s\n", e.what());
throw;
}

return data;
}

common_peg_arena universal_peg_generator::build_parser(const template_analysis_result & analysis,
const common_chat_template & tmpl,
const struct templates_params & inputs,
bool thinking_forced_open) {
GGML_UNUSED(tmpl);

auto parser = build_chat_peg_unified_parser([&](common_chat_peg_unified_builder & p) {
// Build reasoning block using ContentStructure
auto reasoning = p.build_reasoning_block(analysis.content, inputs.reasoning_format, thinking_forced_open);

// Build content block using ContentStructure
// Note: we don't pass tool_section_start here because content-before-tools handling
// is done inline in each branch below with p.content(p.until(marker))
auto content = p.build_content_block(analysis.content, inputs.reasoning_format);

// Build tool section using ToolCallStructure (if applicable)
bool has_tools =
inputs.tools.is_array() && !inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;

if (has_tools && analysis.tools.supports_tools) {
bool force_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
auto tool_section =
p.build_tool_section(analysis.tools, inputs.tools, inputs.parallel_tool_calls, force_calls);

// Compose: reasoning -> content before tools -> tool_section -> trailing content
// When thinking is forced open, the reasoning block expects </think>.
// For tool-only messages (no thinking content), the model may output tools directly
// without the </think> tag, so we need to make reasoning optional in that case.
// But if reasoning_format is NONE, the reasoning block is already eps() - don't wrap it
// in optional() as that would generate invalid grammar.
auto reasoning_for_tools =
(thinking_forced_open && inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE) ?
p.optional(reasoning) :
reasoning;

if (!analysis.tools.tool_section_start.empty()) {
// With section markers: look for start marker to delimit content
auto content_before_tools = p.content(p.until(analysis.tools.tool_section_start));
return p.sequence({ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section,
p.space(), p.optional(p.content(p.rest())), p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME &&
!analysis.tools.function_prefix.empty()) {
// Tag-with-name format (e.g., >>>func_name): content stops at function prefix
auto content_before_tools = p.content(p.until(analysis.tools.function_prefix));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_TAG_WITH_NAME) {
// Functionary-style format: tool call starts immediately (e.g., func_name\n{args})
// No content before tools in this format - the entire output is the tool call
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_BRACKET_TAG ||
analysis.tools.function_format == tool_call_structure::FUNC_PREFIXED_INDEXED) {
// Bracket-tag (Mistral Small 3.2) or prefixed-indexed (Kimi-K2) format:
// Tool calls start with per_call_start marker (e.g., [TOOL_CALLS], <|tool_call_begin|>)
if (!analysis.tools.per_call_start.empty()) {
auto content_before_tools = p.content(p.until(analysis.tools.per_call_start));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// Fallback: no content before tools
return p.sequence({ reasoning_for_tools, p.space(), tool_section, p.end() });
}
if (analysis.tools.function_format == tool_call_structure::FUNC_MARKDOWN_CODE_BLOCK &&
!analysis.tools.code_block_marker.empty()) {
// Markdown code block format (Cohere Command-R Plus):
// Content stops at the code_block_marker (e.g., "Action:")
auto content_before_tools = p.content(p.until(analysis.tools.code_block_marker));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}
// No section markers (raw JSON format): content must stop at JSON object start
// Tool calls start with "{", so use that as a delimiter
auto content_before_tools = p.content(p.until("{"));
return p.sequence(
{ reasoning_for_tools, p.space(), content_before_tools, p.space(), tool_section, p.end() });
}

// No tools - just reasoning (if any) followed by content
return p.sequence({ reasoning, p.space(), content, p.end() });
});

return parser;
}
Loading