-
Notifications
You must be signed in to change notification settings - Fork 19.9k
Autoparser: add optional argument reshuffle capability #20171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ | |
| #include "json-schema-to-grammar.h" | ||
| #include "nlohmann/json.hpp" | ||
|
|
||
| #include <functional> | ||
| #include <stdexcept> | ||
| #include <string> | ||
|
|
||
|
|
@@ -302,8 +303,9 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte | |
| params.at("required").get_to(required); | ||
| } | ||
|
|
||
| // Build parser for each argument | ||
| std::vector<common_peg_parser> arg_parsers; | ||
| // Build parser for each argument, separating required and optional | ||
| std::vector<common_peg_parser> required_parsers; | ||
| std::vector<common_peg_parser> optional_parsers; | ||
| for (const auto & [param_name, param_schema] : properties.items()) { | ||
| bool is_required = required.find(param_name) != required.end(); | ||
| std::string type = "object"; | ||
|
|
@@ -328,20 +330,63 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte | |
| p.space()) + | ||
| p.tool_arg_close(p.literal(arguments.value_suffix))); | ||
|
|
||
| auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); | ||
| if (is_required) { | ||
| arg_parsers.push_back(p.rule("tool-" + name + "-arg-" + param_name, arg)); | ||
| required_parsers.push_back(named_arg); | ||
| } else { | ||
| arg_parsers.push_back(p.optional(p.rule("tool-" + name + "-arg-" + param_name, arg))); | ||
| optional_parsers.push_back(named_arg); | ||
| } | ||
| } | ||
|
|
||
| // Build arg sequence with space() between consecutive args | ||
| // Build required arg sequence in definition order | ||
| common_peg_parser args_seq = p.eps(); | ||
| for (size_t i = 0; i < arg_parsers.size(); i++) { | ||
| for (size_t i = 0; i < required_parsers.size(); i++) { | ||
| if (i > 0) { | ||
| args_seq = args_seq + p.space(); | ||
| } | ||
| args_seq = args_seq + arg_parsers[i]; | ||
| args_seq = args_seq + required_parsers[i]; | ||
| } | ||
|
|
||
| // Build optional args with flexible ordering | ||
| if (!optional_parsers.empty()) { | ||
| if (optional_parsers.size() <= 4) { | ||
| // For up to 4 optional params, generate a recursive choice tree | ||
| // that allows any permutation without duplicates. | ||
| // Each level: choice(space+opt[i]+recurse(remaining-i) for each i, eps) | ||
| std::function<common_peg_parser(std::vector<size_t>)> build_opt_choices; | ||
| build_opt_choices = [&](std::vector<size_t> remaining) -> common_peg_parser { | ||
| if (remaining.empty()) { | ||
| return p.eps(); | ||
| } | ||
| common_peg_parser choices = p.choice(); | ||
| for (size_t i = 0; i < remaining.size(); i++) { | ||
| auto idx = remaining[i]; | ||
| std::vector<size_t> next; | ||
| for (size_t j = 0; j < remaining.size(); j++) { | ||
| if (j != i) { | ||
| next.push_back(remaining[j]); | ||
| } | ||
| } | ||
| choices |= p.space() + optional_parsers[idx] + build_opt_choices(next); | ||
| } | ||
| choices |= p.eps(); | ||
| return choices; | ||
| }; | ||
| std::vector<size_t> all_indices(optional_parsers.size()); | ||
| for (size_t i = 0; i < optional_parsers.size(); i++) { | ||
| all_indices[i] = i; | ||
| } | ||
| args_seq = args_seq + build_opt_choices(all_indices); | ||
| } else { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rather just do this for any number of optional arguments. Presumably a model should know not to generate a duplicate argument, but we will accept it since the JSON parser doesn't care.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean just ignore the shuffle and use the "else" branch as the universal solution? I think the overhead for building the permutations for 4 options is really small (only 24 branches) and it does add a bit of reliability for what I feel is a large number of cases. Unless you think we should just keep it uniform with the JSON parser.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. I simply don't see the need for the added complexity of permutations. A repetition of choice is more than good enough, IMO.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe try it out and see if there are any problems? This problem also exists for the JSON tool calling parsers that still enforce order because of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, the permutations are likely completely unnecessary.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Someone's gotta reel in your genius.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You meant "chaos", right? ;) |
||
| // For 5+ optional params, use choice-of-any repeated up to N times. | ||
| // This may allow duplicate params as a trade-off for avoiding | ||
| // combinatorial explosion of permutations. | ||
| common_peg_parser any_opt = p.choice(); | ||
| for (const auto & opt : optional_parsers) { | ||
| any_opt |= opt; | ||
| } | ||
| args_seq = args_seq + p.repeat(p.space() + any_opt, 0, (int) optional_parsers.size()); | ||
| } | ||
| } | ||
|
|
||
| // Build call_id parser based on position (if supported) | ||
|
|
||

There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for this anymore.