diff --git a/stl/inc/regex b/stl/inc/regex index 17fd2ee72c5..3e9d01bee30 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1721,7 +1721,7 @@ private: // lexing [[noreturn]] void _Error(regex_constants::error_type); - bool _Is_esc() const; + bool _Is_esc(_FwdIt) const; void _Trans(); void _Next(); void _Expect(_Meta_type, regex_constants::error_type); @@ -3875,8 +3875,7 @@ template } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Is_esc() const { // assumes _Pat != _End - _FwdIt _Ch0 = _Pat; +bool _Parser<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End return ++_Ch0 != _End && ((!(_L_flags & _L_nex_grp) && (*_Ch0 == _Meta_lpar || *_Ch0 == _Meta_rpar)) || (!(_L_flags & _L_nex_rep) && (*_Ch0 == _Meta_lbr || *_Ch0 == _Meta_rbr))); @@ -3897,7 +3896,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char } switch (_Char) { // handle special cases case _Meta_esc: - if (_Is_esc()) { // replace escape sequence + if (_Is_esc(_Pat)) { // replace escape sequence _FwdIt _Ch0 = _Pat; _Mchar = static_cast<_Meta_type>(_Char = *++_Ch0); } @@ -3941,9 +3940,28 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char case _Meta_dlr: { // check if $ is special - _FwdIt _Ch0 = _Pat; - if ((_L_flags & _L_anch_rstr) && ++_Ch0 != _End && *_Ch0 != _Meta_nl) { - _Mchar = _Meta_chr; + _FwdIt _Next = _Pat; + if ((_L_flags & _L_anch_rstr) && ++_Next != _End) { + const bool _Escaped = *_Next == _Meta_esc && _Is_esc(_Next); + if (_Escaped) { + ++_Next; + } + + // Only the basic and grep grammars set _L_anch_rstr, so _L_alt_pipe and _L_nex_grp must be unset. + // Therefore, we don't need to handle "dollar followed by pipe '|' for alternation" + // or "dollar followed by non-escaped right parenthesis ')' closing a group" below. + _STL_INTERNAL_CHECK((_L_flags & (_L_alt_pipe | _L_nex_grp)) == 0); + + const _Elem _Ch = *_Next; + const bool _Is_end_of_alternative = + ((_L_flags & _L_alt_nl) && _Ch == _Meta_nl + && _Disj_count == 0) // dollar followed by newline '\n' for alternation + || (_Escaped && _Ch == _Meta_rpar + && _Disj_count != 0); // dollar followed by (escaped) right parenthesis ')' closing a group + + if (!_Is_end_of_alternative) { + _Mchar = _Meta_chr; + } } break; @@ -3972,7 +3990,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char template void _Parser<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character if (_Pat != _End) { // advance - if (*_Pat == _Meta_esc && _Is_esc()) { + if (*_Pat == _Meta_esc && _Is_esc(_Pat)) { ++_Pat; } diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index ba9c575940a..0c2ad16b343 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1031,7 +1031,6 @@ void test_gh_5165_grep() { middle_nl_with_caret.should_search_fail("^a"); middle_nl_with_caret.should_search_fail("ca"); middle_nl_with_caret.should_search_fail("^b"); - middle_nl_with_caret.should_search_fail("ca"); middle_nl_with_caret.should_search_fail("cb"); } { @@ -1171,6 +1170,196 @@ void test_gh_5253() { g_regexTester.should_not_match("a", "()*"); } +void test_gh_5362_syntax_option(const syntax_option_type basic_or_grep) { + { + const test_regex ending_anchor(&g_regexTester, "meo[wW]$", basic_or_grep); + ending_anchor.should_search_match("kitten_meow", "meow"); + ending_anchor.should_search_fail("homeowner"); + } + { + const test_regex middle_anchor(&g_regexTester, "me$o[wW]", basic_or_grep); + middle_anchor.should_search_fail("kitten_meow"); + middle_anchor.should_search_fail("homeowner"); + middle_anchor.should_search_match("home$owner", "me$ow"); + } + { + const test_regex double_dollars(&g_regexTester, "meo[wW]$$", basic_or_grep); + double_dollars.should_search_fail("kitten_meow"); + double_dollars.should_search_fail("homeowner"); + double_dollars.should_search_match("kitten_meow$", "meow$"); + double_dollars.should_search_fail("kitten_meow$$"); + double_dollars.should_search_fail("homeow$ner"); + double_dollars.should_search_fail("homeow$$ner"); + } + + g_regexTester.should_not_match("me$ow", R"(\(me$\)o[wW])", basic_or_grep); + g_regexTester.should_not_match("meow", R"(\(me$\)o[wW])", basic_or_grep); + + { + const test_regex singlegroup_anchor(&g_regexTester, R"(\(meo[wW]$\))", basic_or_grep); + singlegroup_anchor.should_search_match("kitten_meow", "meow"); + singlegroup_anchor.should_search_fail("kitten_meow$"); + singlegroup_anchor.should_search_fail("homeowner"); + singlegroup_anchor.should_search_fail("homeow$ner"); + } + { + const test_regex suffixedgroup_anchor(&g_regexTester, R"(\(meo[wW]$\).*)", basic_or_grep); + suffixedgroup_anchor.should_search_match("kitten_meow", "meow"); + suffixedgroup_anchor.should_search_fail("kitten_meow$"); + suffixedgroup_anchor.should_search_fail("homeowner"); + suffixedgroup_anchor.should_search_fail("homeow$ner"); + } + { + const test_regex firstgroup_anchor(&g_regexTester, R"(\(meo[wW]$\)\(.*\))", basic_or_grep); + firstgroup_anchor.should_search_match("kitten_meow", "meow"); + firstgroup_anchor.should_search_fail("kitten_meow$"); + firstgroup_anchor.should_search_fail("homeowner"); + firstgroup_anchor.should_search_fail("homeow$ner"); + } + { + const test_regex nested_anchor(&g_regexTester, R"(\(\(meo[wW]$\)$\).*)", basic_or_grep); + nested_anchor.should_search_match("kitten_meow", "meow"); + nested_anchor.should_search_fail("kitten_meow$"); + nested_anchor.should_search_fail("kitten_meow$$"); + nested_anchor.should_search_fail("homeowner"); + nested_anchor.should_search_fail("homeow$ner"); + nested_anchor.should_search_fail("homeow$$ner"); + } + { + const test_regex double_dollars(&g_regexTester, R"(\(meo[wW]$$\).*)", basic_or_grep); + double_dollars.should_search_fail("kitten_meow"); + double_dollars.should_search_match("kitten_meow$", "meow$"); + double_dollars.should_search_fail("kitten_meow$$"); + double_dollars.should_search_fail("homeowner"); + double_dollars.should_search_fail("homeow$ner"); + double_dollars.should_search_fail("homeow$$ner"); + } + + // Validate that there is no special behavior near bars, + // as they are alternation operators in regex modes other than basic or grep. + { + const test_regex middle_bar(&g_regexTester, "a|a$", basic_or_grep); + middle_bar.should_search_match("a|a", "a|a"); + middle_bar.should_search_fail("a|a$"); + middle_bar.should_search_fail("a|ab"); + middle_bar.should_search_fail("a"); + } + { + const test_regex group_middle_bar(&g_regexTester, R"(\(a|a\)$)", basic_or_grep); + group_middle_bar.should_search_match("a|a", "a|a"); + group_middle_bar.should_search_fail("a|a$"); + group_middle_bar.should_search_fail("a|ab"); + group_middle_bar.should_search_fail("a"); + } + { + const test_regex middle_bar_with_dollar(&g_regexTester, "a$|b$", basic_or_grep); + middle_bar_with_dollar.should_search_match("a$|b", "a$|b"); + middle_bar_with_dollar.should_search_fail("a|b"); + middle_bar_with_dollar.should_search_fail("a$|b$"); + middle_bar_with_dollar.should_search_fail("a$|bc"); + middle_bar_with_dollar.should_search_fail("a"); + middle_bar_with_dollar.should_search_fail("b"); + } + { + const test_regex group_middle_bar_with_dollar(&g_regexTester, R"(\(a$|b\)$)", basic_or_grep); + group_middle_bar_with_dollar.should_search_match("a$|b", "a$|b"); + group_middle_bar_with_dollar.should_search_fail("a|b"); + group_middle_bar_with_dollar.should_search_fail("a$|b$"); + group_middle_bar_with_dollar.should_search_fail("a$|bc"); + group_middle_bar_with_dollar.should_search_fail("a"); + group_middle_bar_with_dollar.should_search_fail("b"); + } +} + +void test_gh_5362_basic() { + // test cases specific for basic regular expressions + { + const test_regex middle_nl(&g_regexTester, "a\na$", basic); + middle_nl.should_search_match("a\na", "a\na"); + middle_nl.should_search_fail("a\na$"); + middle_nl.should_search_fail("a\nab"); + middle_nl.should_search_fail("a"); + } + { + const test_regex group_middle_nl(&g_regexTester, "\\(a\na\\)$", basic); + group_middle_nl.should_search_match("a\na", "a\na"); + group_middle_nl.should_search_fail("a\na$"); + group_middle_nl.should_search_fail("a\nab"); + group_middle_nl.should_search_fail("a"); + } + { + const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", basic); + middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb"); + middle_nl_with_dollar.should_search_fail("a\nb"); + middle_nl_with_dollar.should_search_fail("a$\nb$"); + middle_nl_with_dollar.should_search_fail("a$\nbc"); + middle_nl_with_dollar.should_search_fail("a"); + middle_nl_with_dollar.should_search_fail("b"); + } + { + const test_regex group_middle_nl_with_dollar(&g_regexTester, "\\(a$\nb\\)$", basic); + group_middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb"); + group_middle_nl_with_dollar.should_search_fail("a\nb"); + group_middle_nl_with_dollar.should_search_fail("a$\nb$"); + group_middle_nl_with_dollar.should_search_fail("a$\nbc"); + group_middle_nl_with_dollar.should_search_fail("a"); + group_middle_nl_with_dollar.should_search_fail("b"); + } +} + +void test_gh_5362_grep() { + // test cases specific for grep mode + { + const test_regex middle_nl(&g_regexTester, "a\na$", grep); + middle_nl.should_search_match("a\na$", "a"); + middle_nl.should_search_match("a\nab", "a"); + middle_nl.should_search_match("a", "a"); + middle_nl.should_search_fail("b"); + } + { + // This regular expression is not accepted by POSIX grep, but currently the regex parser does not reject it. + // If the parser is changed to reject it, adjust this test case. + const test_regex group_middle_nl(&g_regexTester, "\\(a\na\\)$", grep); + group_middle_nl.should_search_match("a\na", "a\na"); + group_middle_nl.should_search_fail("a\na$"); + group_middle_nl.should_search_fail("a\nac"); + group_middle_nl.should_search_fail("a"); + } + { + const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", grep); + middle_nl_with_dollar.should_search_match("a$\nb", "b"); + middle_nl_with_dollar.should_search_match("a\nb", "a"); + middle_nl_with_dollar.should_search_match("ba", "a"); + middle_nl_with_dollar.should_search_match("a", "a"); + middle_nl_with_dollar.should_search_match("b", "b"); + middle_nl_with_dollar.should_search_match("ab", "b"); + middle_nl_with_dollar.should_search_fail("a$"); + middle_nl_with_dollar.should_search_fail("ac"); + middle_nl_with_dollar.should_search_fail("b$"); + middle_nl_with_dollar.should_search_fail("bc"); + } + { + // This regular expression is not accepted by POSIX grep, but currently the regex parser does not reject it. + // If the parser is changed to reject it, adjust this test case. + const test_regex group_middle_nl_with_dollar(&g_regexTester, "\\(a$\nb\\)$", grep); + group_middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb"); + group_middle_nl_with_dollar.should_search_fail("a\nb"); + group_middle_nl_with_dollar.should_search_fail("a$\nb$"); + group_middle_nl_with_dollar.should_search_fail("a$\nbc"); + group_middle_nl_with_dollar.should_search_fail("a"); + group_middle_nl_with_dollar.should_search_fail("b"); + } +} + +void test_gh_5362() { + // GH-5362: ``: Properly parse dollar anchors in basic and grep mode + test_gh_5362_syntax_option(basic); + test_gh_5362_syntax_option(grep); + + test_gh_5362_basic(); + test_gh_5362_grep(); +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -1208,6 +1397,7 @@ int main() { test_gh_5192(); test_gh_5214(); test_gh_5253(); + test_gh_5362(); return g_regexTester.result(); }