diff --git a/README.md b/README.md index 5d28cc0..9074298 100644 --- a/README.md +++ b/README.md @@ -647,6 +647,25 @@ custom_message.txt:1:8: code format error... NOTE: If there are more than one elements with error message instruction in a prioritized choice, this feature may not work as you expect. +Change the Start Definition Rule +-------------------------------- + +We can change the start definition rule as below. + +```cpp +peg::parser parser( + R"( + Start <- A + A <- B (',' B)* + B <- '[one]' / '[two]' + %whitespace <- [ \t\n]* + )", + "A" // Start Rule is "A" +)"; + +parser.parse(" [one] , [two] "); // OK +``` + peglint - PEG syntax lint utility --------------------------------- diff --git a/peglib.h b/peglib.h index 8be3bcd..c5d8b94 100644 --- a/peglib.h +++ b/peglib.h @@ -3298,18 +3298,15 @@ using Rules = std::unordered_map>; class ParserGenerator { public: - static std::shared_ptr parse(const char *s, size_t n, - const Rules &rules, std::string &start, - bool &enablePackratParsing, Log log) { - return get_instance().perform_core(s, n, rules, start, enablePackratParsing, - log); - } + struct ParserContext { + std::shared_ptr grammar; + std::string start; + bool enablePackratParsing = false; + }; - static std::shared_ptr parse(const char *s, size_t n, - std::string &start, - bool &enablePackratParsing, Log log) { - Rules dummy; - return parse(s, n, dummy, start, enablePackratParsing, log); + static ParserContext parse(const char *s, size_t n, const Rules &rules, + Log log, std::string_view start) { + return get_instance().perform_core(s, n, rules, log, start); } // For debugging purpose @@ -3989,9 +3986,8 @@ class ParserGenerator { return true; } - std::shared_ptr perform_core(const char *s, size_t n, - const Rules &rules, std::string &start, - bool &enablePackratParsing, Log log) { + ParserContext perform_core(const char *s, size_t n, const Rules &rules, + Log log, std::string_view requested_start) { Data data; auto &grammar = *data.grammar; @@ -4023,7 +4019,7 @@ class ParserGenerator { log(line.first, line.second, "syntax error", r.error_info.label); } } - return nullptr; + return {}; } // User provided rules @@ -4081,7 +4077,10 @@ class ParserGenerator { } // Set root definition - auto &start_rule = grammar[data.start]; + auto start = data.start; + if (!requested_start.empty()) { start = requested_start; } + + auto &start_rule = grammar[start]; // Check if the start rule has ignore operator { @@ -4096,7 +4095,7 @@ class ParserGenerator { } } - if (!ret) { return nullptr; } + if (!ret) { return {}; } // Check missing definitions auto referenced = std::unordered_set{ @@ -4129,7 +4128,7 @@ class ParserGenerator { } } - if (!ret) { return nullptr; } + if (!ret) { return {}; } // Link references for (auto &x : grammar) { @@ -4153,10 +4152,10 @@ class ParserGenerator { } } - if (!ret) { return nullptr; } + if (!ret) { return {}; } // Check infinite loop - if (detect_infiniteLoop(data, start_rule, log, s)) { return nullptr; } + if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; } // Automatic whitespace skipping if (grammar.count(WHITESPACE_DEFINITION_NAME)) { @@ -4169,7 +4168,7 @@ class ParserGenerator { auto &rule = grammar[WHITESPACE_DEFINITION_NAME]; start_rule.whitespaceOpe = wsp(rule.get_core_operator()); - if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } + if (detect_infiniteLoop(data, rule, log, s)) { return {}; } } // Word expression @@ -4177,7 +4176,7 @@ class ParserGenerator { auto &rule = grammar[WORD_DEFINITION_NAME]; start_rule.wordOpe = rule.get_core_operator(); - if (detect_infiniteLoop(data, rule, log, s)) { return nullptr; } + if (detect_infiniteLoop(data, rule, log, s)) { return {}; } } // Apply instructions @@ -4189,9 +4188,7 @@ class ParserGenerator { const auto &info = std::any_cast(instruction.data); - if (!apply_precedence_instruction(rule, info, s, log)) { - return nullptr; - } + if (!apply_precedence_instruction(rule, info, s, log)) { return {}; } } else if (instruction.type == "error_message") { rule.error_message = std::any_cast(instruction.data); } else if (instruction.type == "no_ast_opt") { @@ -4200,11 +4197,7 @@ class ParserGenerator { } } - // Set root definition - start = data.start; - enablePackratParsing = data.enablePackratParsing; - - return data.grammar; + return {data.grammar, start, data.enablePackratParsing}; } bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, @@ -4530,43 +4523,52 @@ class parser { public: parser() = default; - parser(const char *s, size_t n, const Rules &rules) { - load_grammar(s, n, rules); + parser(const char *s, size_t n, const Rules &rules, + std::string_view start = {}) { + load_grammar(s, n, rules, start); } - parser(const char *s, size_t n) : parser(s, n, Rules()) {} + parser(const char *s, size_t n, std::string_view start = {}) + : parser(s, n, Rules(), start) {} - parser(std::string_view sv, const Rules &rules) - : parser(sv.data(), sv.size(), rules) {} + parser(std::string_view sv, const Rules &rules, std::string_view start = {}) + : parser(sv.data(), sv.size(), rules, start) {} - parser(std::string_view sv) : parser(sv.data(), sv.size(), Rules()) {} + parser(std::string_view sv, std::string_view start = {}) + : parser(sv.data(), sv.size(), Rules(), start) {} #if defined(__cpp_lib_char8_t) - parser(std::u8string_view sv, const Rules &rules) - : parser(reinterpret_cast(sv.data()), sv.size(), rules) {} + parser(std::u8string_view sv, const Rules &rules, std::string_view start = {}) + : parser(reinterpret_cast(sv.data()), sv.size(), rules, + start) {} - parser(std::u8string_view sv) - : parser(reinterpret_cast(sv.data()), sv.size(), Rules()) {} + parser(std::u8string_view sv, std::string_view start = {}) + : parser(reinterpret_cast(sv.data()), sv.size(), Rules(), + start) {} #endif operator bool() { return grammar_ != nullptr; } - bool load_grammar(const char *s, size_t n, const Rules &rules) { - grammar_ = ParserGenerator::parse(s, n, rules, start_, - enablePackratParsing_, log_); + bool load_grammar(const char *s, size_t n, const Rules &rules, + std::string_view start = {}) { + auto cxt = ParserGenerator::parse(s, n, rules, log_, start); + grammar_ = cxt.grammar; + start_ = cxt.start; + enablePackratParsing_ = cxt.enablePackratParsing; return grammar_ != nullptr; } - bool load_grammar(const char *s, size_t n) { - return load_grammar(s, n, Rules()); + bool load_grammar(const char *s, size_t n, std::string_view start = {}) { + return load_grammar(s, n, Rules(), start); } - bool load_grammar(std::string_view sv, const Rules &rules) { - return load_grammar(sv.data(), sv.size(), rules); + bool load_grammar(std::string_view sv, const Rules &rules, + std::string_view start = {}) { + return load_grammar(sv.data(), sv.size(), rules, start); } - bool load_grammar(std::string_view sv) { - return load_grammar(sv.data(), sv.size()); + bool load_grammar(std::string_view sv, std::string_view start = {}) { + return load_grammar(sv.data(), sv.size(), start); } bool parse_n(const char *s, size_t n, const char *path = nullptr) const { @@ -4671,7 +4673,7 @@ class parser { void enable_packrat_parsing() { if (grammar_ != nullptr) { auto &rule = (*grammar_)[start_]; - rule.enablePackratParsing = enablePackratParsing_ && true; + rule.enablePackratParsing = enablePackratParsing_; } } diff --git a/test/test1.cc b/test/test1.cc index f63ad96..9b11252 100644 --- a/test/test1.cc +++ b/test/test1.cc @@ -423,7 +423,9 @@ TEST(GeneralTest, Skip_token_test2) { } TEST(GeneralTest, Custom_AST_test) { - struct CustomType { bool dummy = false; }; + struct CustomType { + bool dummy = false; + }; using CustomAst = AstBase; parser parser(R"( @@ -646,11 +648,8 @@ TEST(GeneralTest, Calculator_test2) { NUMBER <- [0-9]+ )"; - std::string start; - bool enablePackratParsing = false; - auto grammar = ParserGenerator::parse(syntax, strlen(syntax), start, - enablePackratParsing, nullptr); - auto &g = *grammar; + auto cxt = ParserGenerator::parse(syntax, strlen(syntax), {}, nullptr, {}); + auto &g = *cxt.grammar; // Setup actions auto reduce = [](const SemanticValues &vs) -> long { @@ -679,7 +678,7 @@ TEST(GeneralTest, Calculator_test2) { // Parse long val; - auto r = g[start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); + auto r = g[cxt.start].parse_and_get_value("1+2*3*(4-5+6)/7-8", val); EXPECT_TRUE(r.ret); EXPECT_EQ(-3, val); @@ -1285,3 +1284,35 @@ TEST(GeneralTest, PassingContextAndOutputParameter) { parser.parse("42", dt, output); EXPECT_EQ(42, output); } + +TEST(GeneralTest, SpecifyStartRule) { + auto grammar = R"( + Start <- A + A <- B (',' B)* + B <- '[one]' / '[two]' + %whitespace <- [ \t\n]* + )"; + + { + parser peg(grammar, "A"); + EXPECT_TRUE(peg.parse(" [one] , [two] ")); + } + + { + parser peg(grammar); + EXPECT_TRUE(peg.parse(" [one] , [two] ")); + + peg.load_grammar(grammar, "A"); + EXPECT_TRUE(peg.parse(" [one] , [two] ")); + } + + { + parser peg; + + peg.load_grammar(grammar); + EXPECT_TRUE(peg.parse(" [one] , [two] ")); + + peg.load_grammar(grammar, "A"); + EXPECT_TRUE(peg.parse(" [one] , [two] ")); + } +}