Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cpp/grammar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ Grammar Grammar::FromJSONSchema(
bool any_whitespace,
std::optional<int> indent,
std::optional<std::pair<std::string, std::string>> separators,
bool strict_mode
bool strict_mode,
bool print_converted_ebnf
) {
auto ebnf_string = JSONSchemaToEBNF(schema, any_whitespace, indent, separators, strict_mode);
if (print_converted_ebnf) {
XGRAMMAR_LOG(INFO) << "Converted EBNF: " << ebnf_string << std::endl;
}
return FromEBNF(ebnf_string);
}

Expand Down
5 changes: 5 additions & 0 deletions cpp/json_schema_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,11 @@ std::string JSONSchemaConverter::CreateRuleFromSchema(
) {
std::string idx = GetSchemaCacheIndex(schema);
if (basic_rules_cache_.count(idx)) {
if (rule_name_hint == kRootRuleName) {
// If the rule name is root, we need to define the root rule instead of just using the
// cached rule.
return ebnf_script_creator_.AddRule(rule_name_hint, basic_rules_cache_[idx]);
}
Copy link
Contributor

@wangkuiyi wangkuiyi Mar 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this change the behavior of the converter and make it tend to generate a new rule? @Ubospica

return basic_rules_cache_[idx];
}

Expand Down
3 changes: 2 additions & 1 deletion cpp/nanobind/nanobind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ NB_MODULE(xgrammar_bindings, m) {
nb::arg("any_whitespace"),
nb::arg("indent").none(),
nb::arg("separators").none(),
nb::arg("strict_mode")
nb::arg("strict_mode"),
nb::arg("print_converted_ebnf")
)
.def_static("from_regex", &Grammar::FromRegex)
.def_static("from_structural_tag", &Grammar_FromStructuralTag)
Expand Down
3 changes: 2 additions & 1 deletion include/xgrammar/grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ class Grammar {
bool any_whitespace = true,
std::optional<int> indent = std::nullopt,
std::optional<std::pair<std::string, std::string>> separators = std::nullopt,
bool strict_mode = true
bool strict_mode = true,
bool print_converted_ebnf = false
);

/*!
Expand Down
6 changes: 5 additions & 1 deletion python/xgrammar/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def from_json_schema(
indent: Optional[int] = None,
separators: Optional[Tuple[str, str]] = None,
strict_mode: bool = True,
print_converted_ebnf: bool = False,
) -> "Grammar":
"""Construct a grammar from JSON schema. Pydantic model or JSON schema string can be
used to specify the schema.
Expand Down Expand Up @@ -159,6 +160,9 @@ def from_json_schema(
This helps LLM to generate accurate output in the grammar-guided generation with JSON
schema.

print_converted_ebnf : bool, default: False
If True, the converted EBNF string will be printed. For debugging purposes.

Returns
-------
grammar : Grammar
Expand All @@ -172,7 +176,7 @@ def from_json_schema(
schema_str = _convert_schema_to_str(schema)
return Grammar._create_from_handle(
_core.Grammar.from_json_schema(
schema_str, any_whitespace, indent, separators, strict_mode
schema_str, any_whitespace, indent, separators, strict_mode, print_converted_ebnf
)
)

Expand Down
48 changes: 48 additions & 0 deletions tests/python/test_json_schema_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,5 +1100,53 @@ def test_empty_object():
check_schema_with_instance(schema, instance_accepted_2, any_whitespace=True)


def test_primitive_type_string():
schema = {"type": "string"}
ebnf_grammar = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
basic_integer ::= ("0" | "-"? [1-9] [0-9]*)
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
basic_string ::= ["] basic_string_sub
basic_boolean ::= "true" | "false"
basic_null ::= "null"
basic_array ::= ("[" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_any)* [ \n\t]* "]") | "[" [ \n\t]* "]"
basic_object ::= ("{" [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any)* [ \n\t]* "}") | "{" [ \n\t]* "}"
root ::= basic_string
"""

check_schema_with_grammar(schema, ebnf_grammar, any_whitespace=True)

instance_accepted = '"test"'
instance_rejected = "123"

check_schema_with_instance(schema, instance_accepted, any_whitespace=True)
check_schema_with_instance(schema, instance_rejected, is_accepted=False, any_whitespace=True)


def test_primitive_type_object():
schema = {"type": "object"}
ebnf_grammar = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:])
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
basic_integer ::= ("0" | "-"? [1-9] [0-9]*)
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
basic_string ::= ["] basic_string_sub
basic_boolean ::= "true" | "false"
basic_null ::= "null"
basic_array ::= ("[" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_any)* [ \n\t]* "]") | "[" [ \n\t]* "]"
basic_object ::= ("{" [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any)* [ \n\t]* "}") | "{" [ \n\t]* "}"
root ::= basic_object
"""

check_schema_with_grammar(schema, ebnf_grammar, any_whitespace=True)

instance_accepted = '{"name": "test"}'
instance_rejected = '"test"'

check_schema_with_instance(schema, instance_accepted, any_whitespace=True)
check_schema_with_instance(schema, instance_rejected, is_accepted=False, any_whitespace=True)


if __name__ == "__main__":
pytest.main(sys.argv)
Loading