From 6d8df47b2d12f4af3929053a94714e59a0d73b81 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Thu, 20 Mar 2025 06:57:32 +0000 Subject: [PATCH 1/2] finish --- cpp/grammar.cc | 6 ++- cpp/json_schema_converter.cc | 5 +++ cpp/nanobind/nanobind.cc | 3 +- include/xgrammar/grammar.h | 3 +- python/xgrammar/grammar.py | 6 ++- tests/python/test_json_schema_converter.py | 49 ++++++++++++++++++++++ 6 files changed, 68 insertions(+), 4 deletions(-) diff --git a/cpp/grammar.cc b/cpp/grammar.cc index 4fef2155..38818e10 100644 --- a/cpp/grammar.cc +++ b/cpp/grammar.cc @@ -27,9 +27,13 @@ Grammar Grammar::FromJSONSchema( bool any_whitespace, std::optional indent, std::optional> separators, - bool strict_mode + bool strict_mode, + bool print_converted_ebnf ) { auto ebnf_string = JSONSchemaToEBNF(schema, any_whitespace, indent, separators, strict_mode); + if (print_converted_ebnf) { + XGRAMMAR_LOG(INFO) << "Converted EBNF: " << ebnf_string << std::endl; + } return FromEBNF(ebnf_string); } diff --git a/cpp/json_schema_converter.cc b/cpp/json_schema_converter.cc index baff3747..cd1f251c 100644 --- a/cpp/json_schema_converter.cc +++ b/cpp/json_schema_converter.cc @@ -739,6 +739,11 @@ std::string JSONSchemaConverter::CreateRuleFromSchema( ) { std::string idx = GetSchemaCacheIndex(schema); if (basic_rules_cache_.count(idx)) { + if (rule_name_hint == kRootRuleName) { + // If the rule name is root, we need to define the root rule instead of just using the + // cached rule. + return ebnf_script_creator_.AddRule(rule_name_hint, basic_rules_cache_[idx]); + } return basic_rules_cache_[idx]; } diff --git a/cpp/nanobind/nanobind.cc b/cpp/nanobind/nanobind.cc index d81e6282..b34fade4 100644 --- a/cpp/nanobind/nanobind.cc +++ b/cpp/nanobind/nanobind.cc @@ -99,7 +99,8 @@ NB_MODULE(xgrammar_bindings, m) { nb::arg("any_whitespace"), nb::arg("indent").none(), nb::arg("separators").none(), - nb::arg("strict_mode") + nb::arg("strict_mode"), + nb::arg("print_converted_ebnf") ) .def_static("from_regex", &Grammar::FromRegex) .def_static("from_structural_tag", &Grammar_FromStructuralTag) diff --git a/include/xgrammar/grammar.h b/include/xgrammar/grammar.h index cd836b1d..a6bdf262 100644 --- a/include/xgrammar/grammar.h +++ b/include/xgrammar/grammar.h @@ -109,7 +109,8 @@ class Grammar { bool any_whitespace = true, std::optional indent = std::nullopt, std::optional> separators = std::nullopt, - bool strict_mode = true + bool strict_mode = true, + bool print_converted_ebnf = false ); /*! diff --git a/python/xgrammar/grammar.py b/python/xgrammar/grammar.py index c5bc0734..f73d57df 100644 --- a/python/xgrammar/grammar.py +++ b/python/xgrammar/grammar.py @@ -117,6 +117,7 @@ def from_json_schema( indent: Optional[int] = None, separators: Optional[Tuple[str, str]] = None, strict_mode: bool = True, + print_converted_ebnf: bool = False, ) -> "Grammar": """Construct a grammar from JSON schema. Pydantic model or JSON schema string can be used to specify the schema. @@ -159,6 +160,9 @@ def from_json_schema( This helps LLM to generate accurate output in the grammar-guided generation with JSON schema. + print_converted_ebnf : bool, default: False + If True, the converted EBNF string will be printed. For debugging purposes. + Returns ------- grammar : Grammar @@ -172,7 +176,7 @@ def from_json_schema( schema_str = _convert_schema_to_str(schema) return Grammar._create_from_handle( _core.Grammar.from_json_schema( - schema_str, any_whitespace, indent, separators, strict_mode + schema_str, any_whitespace, indent, separators, strict_mode, print_converted_ebnf ) ) diff --git a/tests/python/test_json_schema_converter.py b/tests/python/test_json_schema_converter.py index ed86573a..06c6b7dc 100644 --- a/tests/python/test_json_schema_converter.py +++ b/tests/python/test_json_schema_converter.py @@ -26,6 +26,7 @@ def check_schema_with_grammar( separators=separators, strict_mode=strict_mode, ) + print(json_schema_ebnf) assert json_schema_ebnf == expected_grammar_ebnf @@ -1100,5 +1101,53 @@ def test_empty_object(): check_schema_with_instance(schema, instance_accepted_2, any_whitespace=True) +def test_primitive_type_string(): + schema = {"type": "string"} + ebnf_grammar = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] +basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:]) +basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object +basic_integer ::= ("0" | "-"? [1-9] [0-9]*) +basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)? +basic_string ::= ["] basic_string_sub +basic_boolean ::= "true" | "false" +basic_null ::= "null" +basic_array ::= ("[" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_any)* [ \n\t]* "]") | "[" [ \n\t]* "]" +basic_object ::= ("{" [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any)* [ \n\t]* "}") | "{" [ \n\t]* "}" +root ::= basic_string +""" + + check_schema_with_grammar(schema, ebnf_grammar, any_whitespace=True) + + instance_accepted = '"test"' + instance_rejected = "123" + + check_schema_with_instance(schema, instance_accepted, any_whitespace=True) + check_schema_with_instance(schema, instance_rejected, is_accepted=False, any_whitespace=True) + + +def test_primitive_type_object(): + schema = {"type": "object"} + ebnf_grammar = r"""basic_escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] +basic_string_sub ::= ("\"" | [^"\\\r\n] basic_string_sub | "\\" basic_escape basic_string_sub) (= [ \n\t]* [,}\]:]) +basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object +basic_integer ::= ("0" | "-"? [1-9] [0-9]*) +basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)? +basic_string ::= ["] basic_string_sub +basic_boolean ::= "true" | "false" +basic_null ::= "null" +basic_array ::= ("[" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_any)* [ \n\t]* "]") | "[" [ \n\t]* "]" +basic_object ::= ("{" [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any ([ \n\t]* "," [ \n\t]* basic_string [ \n\t]* ":" [ \n\t]* basic_any)* [ \n\t]* "}") | "{" [ \n\t]* "}" +root ::= basic_object +""" + + check_schema_with_grammar(schema, ebnf_grammar, any_whitespace=True) + + instance_accepted = '{"name": "test"}' + instance_rejected = '"test"' + + check_schema_with_instance(schema, instance_accepted, any_whitespace=True) + check_schema_with_instance(schema, instance_rejected, is_accepted=False, any_whitespace=True) + + if __name__ == "__main__": pytest.main(sys.argv) From 023886ae51d7e1f6e6734e43d442da4394224427 Mon Sep 17 00:00:00 2001 From: Ubospica Date: Thu, 20 Mar 2025 06:58:22 +0000 Subject: [PATCH 2/2] update --- tests/python/test_json_schema_converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python/test_json_schema_converter.py b/tests/python/test_json_schema_converter.py index 06c6b7dc..db6c7432 100644 --- a/tests/python/test_json_schema_converter.py +++ b/tests/python/test_json_schema_converter.py @@ -26,7 +26,6 @@ def check_schema_with_grammar( separators=separators, strict_mode=strict_mode, ) - print(json_schema_ebnf) assert json_schema_ebnf == expected_grammar_ebnf