Skip to content

Commit 6201458

Browse files
committed
Fix #286
1 parent 4e305b4 commit 6201458

File tree

3 files changed

+75
-21
lines changed

3 files changed

+75
-21
lines changed

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,13 @@ START <- 'This month is ' MONTH '.'
347347
MONTH <- 'Jan' | 'January' | 'Feb' | 'February' | '...'
348348
```
349349

350+
It supports the case insensitive mode.
351+
352+
```peg
353+
START <- 'This month is ' MONTH '.'
354+
MONTH <- 'Jan'i | 'January'i | 'Feb'i | 'February'i | '...'i
355+
```
356+
350357
Cut operator
351358
------------
352359

peglib.h

+46-21
Original file line numberDiff line numberDiff line change
@@ -377,14 +377,13 @@ template <typename T> T token_to_number_(std::string_view sv) {
377377

378378
class Trie {
379379
public:
380-
Trie() = default;
381-
Trie(const Trie &) = default;
382-
383-
Trie(const std::vector<std::string> &items) {
380+
Trie(const std::vector<std::string> &items, bool ignore_case)
381+
: ignore_case_(ignore_case) {
384382
for (const auto &item : items) {
385383
for (size_t len = 1; len <= item.size(); len++) {
386384
auto last = len == item.size();
387-
std::string_view sv(item.data(), len);
385+
const auto &s = ignore_case ? to_lower(item) : item;
386+
std::string_view sv(s.data(), len);
388387
auto it = dic_.find(sv);
389388
if (it == dic_.end()) {
390389
dic_.emplace(sv, Info{last, last});
@@ -402,7 +401,8 @@ class Trie {
402401
auto done = false;
403402
size_t len = 1;
404403
while (!done && len <= text_len) {
405-
std::string_view sv(text, len);
404+
const auto &s = ignore_case_ ? to_lower(text) : std::string(text);
405+
std::string_view sv(s.data(), len);
406406
auto it = dic_.find(sv);
407407
if (it == dic_.end()) {
408408
done = true;
@@ -416,6 +416,13 @@ class Trie {
416416
}
417417

418418
private:
419+
std::string to_lower(std::string s) const {
420+
for (char &c : s) {
421+
c = std::tolower(c);
422+
}
423+
return s;
424+
}
425+
419426
struct Info {
420427
bool done;
421428
bool match;
@@ -424,6 +431,8 @@ class Trie {
424431
// TODO: Use unordered_map when heterogeneous lookup is supported in C++20
425432
// std::unordered_map<std::string, Info> dic_;
426433
std::map<std::string, Info, std::less<>> dic_;
434+
435+
bool ignore_case_;
427436
};
428437

429438
/*-----------------------------------------------------------------------------
@@ -1159,7 +1168,8 @@ class NotPredicate : public Ope {
11591168

11601169
class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
11611170
public:
1162-
Dictionary(const std::vector<std::string> &v) : trie_(v) {}
1171+
Dictionary(const std::vector<std::string> &v, bool ignore_case)
1172+
: trie_(v, ignore_case) {}
11631173

11641174
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
11651175
std::any &dt) const override;
@@ -1568,8 +1578,9 @@ inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
15681578
return std::make_shared<NotPredicate>(ope);
15691579
}
15701580

1571-
inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v) {
1572-
return std::make_shared<Dictionary>(v);
1581+
inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v,
1582+
bool ignore_case) {
1583+
return std::make_shared<Dictionary>(v, ignore_case);
15731584
}
15741585

15751586
inline std::shared_ptr<Ope> lit(std::string &&s) {
@@ -3335,16 +3346,17 @@ class ParserGenerator {
33353346
seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
33363347
g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
33373348
g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
3338-
g["Primary"] <=
3339-
cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
3340-
npd(g["LEFTARROW"])),
3341-
seq(g["Ignore"], g["Identifier"],
3342-
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
3343-
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
3344-
seq(g["BeginTok"], g["Expression"], g["EndTok"]), g["CapScope"],
3345-
seq(g["BeginCap"], g["Expression"], g["EndCap"]), g["BackRef"],
3346-
g["LiteralI"], g["Dictionary"], g["Literal"], g["NegatedClassI"],
3347-
g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
3349+
g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
3350+
npd(g["LEFTARROW"])),
3351+
seq(g["Ignore"], g["Identifier"],
3352+
npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
3353+
seq(g["OPEN"], g["Expression"], g["CLOSE"]),
3354+
seq(g["BeginTok"], g["Expression"], g["EndTok"]),
3355+
g["CapScope"],
3356+
seq(g["BeginCap"], g["Expression"], g["EndCap"]),
3357+
g["BackRef"], g["DictionaryI"], g["LiteralI"],
3358+
g["Dictionary"], g["Literal"], g["NegatedClassI"],
3359+
g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
33483360

33493361
g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
33503362
g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
@@ -3358,18 +3370,23 @@ class ParserGenerator {
33583370

33593371
g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
33603372

3373+
g["DictionaryI"] <=
3374+
seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"])));
3375+
33613376
auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
33623377
cls("'"), g["Spacing"]),
33633378
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
33643379
cls("\""), g["Spacing"]));
33653380
g["Literal"] <= lit_ope;
33663381
g["LiteralD"] <= lit_ope;
33673382

3368-
g["LiteralI"] <=
3383+
auto lit_case_ignore_ope =
33693384
cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
33703385
g["Spacing"]),
33713386
seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
33723387
g["Spacing"]));
3388+
g["LiteralI"] <= lit_case_ignore_ope;
3389+
g["LiteralID"] <= lit_case_ignore_ope;
33733390

33743391
// NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
33753392
g["Class"] <= seq(chr('['), npd(chr('^')),
@@ -3720,7 +3737,11 @@ class ParserGenerator {
37203737

37213738
g["Dictionary"] = [](const SemanticValues &vs) {
37223739
auto items = vs.transform<std::string>();
3723-
return dic(items);
3740+
return dic(items, false);
3741+
};
3742+
g["DictionaryI"] = [](const SemanticValues &vs) {
3743+
auto items = vs.transform<std::string>();
3744+
return dic(items, true);
37243745
};
37253746

37263747
g["Literal"] = [](const SemanticValues &vs) {
@@ -3735,6 +3756,10 @@ class ParserGenerator {
37353756
auto &tok = vs.tokens.front();
37363757
return resolve_escape_sequence(tok.data(), tok.size());
37373758
};
3759+
g["LiteralID"] = [](const SemanticValues &vs) {
3760+
auto &tok = vs.tokens.front();
3761+
return resolve_escape_sequence(tok.data(), tok.size());
3762+
};
37383763

37393764
g["Class"] = [](const SemanticValues &vs) {
37403765
auto ranges = vs.transform<std::pair<char32_t, char32_t>>();

test/test1.cc

+22
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,28 @@ TEST(GeneralTest, Word_expression_test_Dictionary) {
374374
EXPECT_TRUE(parser.parse("toa"));
375375
}
376376

377+
TEST(GeneralTest, Word_expression_case_ignore_test_Dictionary) {
378+
parser parser(R"(
379+
Identifier ← < !Keyword [a-z][a-z]* >
380+
Keyword ← 'def'i | 'to'i
381+
%whitespace ← [ \t\r\n]*
382+
%word ← [a-z]+
383+
)");
384+
385+
EXPECT_TRUE(parser.parse("toa"));
386+
}
387+
388+
TEST(GeneralTest, Word_expression_syntax_error_test_Dictionary) {
389+
parser parser(R"(
390+
Identifier ← < !Keyword [a-z][a-z]* >
391+
Keyword ← 'def' | 'to'i
392+
%whitespace ← [ \t\r\n]*
393+
%word ← [a-z]+
394+
)");
395+
396+
EXPECT_FALSE(parser);
397+
}
398+
377399
TEST(GeneralTest, Skip_token_test) {
378400
parser parser(" ROOT <- _ ITEM (',' _ ITEM _)* "
379401
" ITEM <- ([a-z0-9])+ "

0 commit comments

Comments
 (0)