Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 52 additions & 7 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "json-schema-to-grammar.h"
#include "nlohmann/json.hpp"

#include <functional>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this anymore.

#include <stdexcept>
#include <string>

Expand Down Expand Up @@ -302,8 +303,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
params.at("required").get_to(required);
}

// Build parser for each argument
std::vector<common_peg_parser> arg_parsers;
// Build parser for each argument, separating required and optional
std::vector<common_peg_parser> required_parsers;
std::vector<common_peg_parser> optional_parsers;
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
std::string type = "object";
Expand All @@ -328,20 +330,63 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
p.space()) +
p.tool_arg_close(p.literal(arguments.value_suffix)));

auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
if (is_required) {
arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg));
required_parsers.push_back(named_arg);
} else {
arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg)));
optional_parsers.push_back(named_arg);
}
}

// Build arg sequence with space() between consecutive args
// Build required arg sequence in definition order
common_peg_parser args_seq = p.eps();
for (size_t i = 0; i < arg_parsers.size(); i++) {
for (size_t i = 0; i < required_parsers.size(); i++) {
if (i > 0) {
args_seq = args_seq + p.space();
}
args_seq = args_seq + arg_parsers[i];
args_seq = args_seq + required_parsers[i];
}

// Build optional args with flexible ordering
if (!optional_parsers.empty()) {
if (optional_parsers.size() <= 4) {
// For up to 4 optional params, generate a recursive choice tree
// that allows any permutation without duplicates.
// Each level: choice(space+opt[i]+recurse(remaining-i) for each i, eps)
std::function<common_peg_parser(std::vector<size_t>)> build_opt_choices;
build_opt_choices = [&](std::vector<size_t> remaining) -> common_peg_parser {
if (remaining.empty()) {
return p.eps();
}
common_peg_parser choices = p.choice();
for (size_t i = 0; i < remaining.size(); i++) {
auto idx = remaining[i];
std::vector<size_t> next;
for (size_t j = 0; j < remaining.size(); j++) {
if (j != i) {
next.push_back(remaining[j]);
}
}
choices |= p.space() + optional_parsers[idx] + build_opt_choices(next);
}
choices |= p.eps();
return choices;
};
std::vector<size_t> all_indices(optional_parsers.size());
for (size_t i = 0; i < optional_parsers.size(); i++) {
all_indices[i] = i;
}
args_seq = args_seq + build_opt_choices(all_indices);
} else {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather just do this for any number of optional arguments. Presumably a model should know not to generate a duplicate argument, but we will accept it since the JSON parser doesn't care.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean just ignore the shuffle and use the "else" branch as the universal solution?

I think the overhead for building the permutations for 4 options is really small (only 24 branches) and it does add a bit of reliability for what I feel is a large number of cases. Unless you think we should just keep it uniform with the JSON parser.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I simply don't see the need for the added complexity of permutations. A repetition of choice is more than good enough, IMO.

@aldehir aldehir Mar 6, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe try it out and see if there are any problems?

This problem also exists for the JSON tool calling parsers that still enforce order because of json-schema-to-grammar. However, we can explode the arguments out like the XML parsers with a repetition that has a leading comma.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, the permutations are likely completely unnecessary.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine, no shuffle.
image

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Someone's gotta reel in your genius.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You meant "chaos", right? ;)

// For 5+ optional params, use choice-of-any repeated up to N times.
// This may allow duplicate params as a trade-off for avoiding
// combinatorial explosion of permutations.
common_peg_parser any_opt = p.choice();
for (const auto & opt : optional_parsers) {
any_opt |= opt;
}
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size());
}
}

// Build call_id parser based on position (if supported)
Expand Down
87 changes: 87 additions & 0 deletions tests/test-chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,41 @@ static common_chat_tool quoted_unquoted_tool{
};


static common_chat_tool tool_2req_4opt{
/* .name = */ "tool_2req_4opt",
/* .description = */ "Tool with 2 required and 4 optional params",
/* .parameters = */ R"({
"type": "object",
"properties": {
"req1": { "type": "string", "description": "Required string" },
"req2": { "type": "integer", "description": "Required int" },
"opt1": { "type": "string", "description": "Optional string 1" },
"opt2": { "type": "integer", "description": "Optional int 1" },
"opt3": { "type": "string", "description": "Optional string 2" },
"opt4": { "type": "integer", "description": "Optional int 2" }
},
"required": ["req1", "req2"]
})",
};

static common_chat_tool tool_2req_5opt{
/* .name = */ "tool_2req_5opt",
/* .description = */ "Tool with 2 required and 5 optional params",
/* .parameters = */ R"({
"type": "object",
"properties": {
"req1": { "type": "string", "description": "Required string" },
"req2": { "type": "integer", "description": "Required int" },
"opt1": { "type": "string", "description": "Optional string 1" },
"opt2": { "type": "integer", "description": "Optional int 1" },
"opt3": { "type": "string", "description": "Optional string 2" },
"opt4": { "type": "integer", "description": "Optional int 2" },
"opt5": { "type": "string", "description": "Optional string 3" }
},
"required": ["req1", "req2"]
})",
};

static std::vector<common_chat_tool> tools{ special_function_tool, special_function_tool_with_optional_param,
python_tool, html_tool, todo_list };

Expand Down Expand Up @@ -1958,6 +1993,58 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
{ "todo_list", "{\"todos\": [{\"item\": \"Check stuff\", \"selected\": false}, {\"item\": \"Prepare stuff\", \"selected\": true}]}", {} },
})
.run();

// Test flexible optional argument ordering (2 required + 4 optional, reversed optional order)
tst.test(
"<tool_call>\n"
"<function=tool_2req_4opt>\n"
"<parameter=req1>\nhello\n</parameter>\n"
"<parameter=req2>\n42\n</parameter>\n"
"<parameter=opt4>\n100\n</parameter>\n"
"<parameter=opt2>\n200\n</parameter>\n"
"</function>\n"
"</tool_call>")
.tools({ tool_2req_4opt })
.expect_tool_calls({
{ "tool_2req_4opt", R"({"req1": "hello", "req2": 42, "opt4": 100, "opt2": 200})", {} },
})
.run();

// Test flexible optional argument ordering (2 required + 5 optional, reversed optional order)
tst.test(
"<tool_call>\n"
"<function=tool_2req_5opt>\n"
"<parameter=req1>\nworld\n</parameter>\n"
"<parameter=req2>\n7\n</parameter>\n"
"<parameter=opt5>\nlast\n</parameter>\n"
"<parameter=opt3>\nmiddle\n</parameter>\n"
"<parameter=opt1>\nfirst\n</parameter>\n"
"</function>\n"
"</tool_call>")
.tools({ tool_2req_5opt })
.expect_tool_calls({
{ "tool_2req_5opt", R"({"req1": "world", "req2": 7, "opt5": "last", "opt3": "middle", "opt1": "first"})", {} },
})
.run();

// Test flexible optional argument ordering (2 required + 5 optional, all 5 in shuffled order)
tst.test(
"<tool_call>\n"
"<function=tool_2req_5opt>\n"
"<parameter=req1>\ntest\n</parameter>\n"
"<parameter=req2>\n99\n</parameter>\n"
"<parameter=opt3>\nc\n</parameter>\n"
"<parameter=opt1>\na\n</parameter>\n"
"<parameter=opt5>\ne\n</parameter>\n"
"<parameter=opt4>\n4\n</parameter>\n"
"<parameter=opt2>\n2\n</parameter>\n"
"</function>\n"
"</tool_call>")
.tools({ tool_2req_5opt })
.expect_tool_calls({
{ "tool_2req_5opt", R"({"req1": "test", "req2": 99, "opt3": "c", "opt1": "a", "opt5": "e", "opt4": 4, "opt2": 2})", {} },
})
.run();
}
{
auto tst = peg_tester("models/templates/deepseek-ai-DeepSeek-V3.1.jinja", detailed_debug);
Expand Down
Loading