diff --git a/stl/inc/regex b/stl/inc/regex index 3366345c261..e3110d4207b 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -119,6 +119,7 @@ namespace regex_constants { grep = 0x10, egrep = 0x20, _Gmask = 0x3F, + _Any_posix = basic | extended | grep | egrep | awk, icase = 0x0100, nosubs = 0x0200, @@ -3199,10 +3200,14 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_rep2(int _Min, int _Max, bool _Gre _Node_base* _Pos = _Current; if (_Pos->_Kind == _N_end_group || _Pos->_Kind == _N_end_capture) { _Pos = static_cast<_Node_end_group*>(_Pos)->_Back; - } - - if (_Min == 0 && _Max == 1) { // rewrite zero-or-one quantifiers as alternations to make the - // "simple loop" optimization more likely to engage + } else if (_Min == 0 && _Max == 1) { + // Rewrite zero-or-one quantifiers as alternations to make the + // "simple loop" optimization more likely to engage. + // + // GH-5490: This rewrite becomes observably incorrect + // if the subexpression contains capture groups, + // so we don't apply it if the subexpression is surrounded + // by a capturing or non-capturing group. _Node_endif* _End = new _Node_endif; _Node_if* _If_expr = new _Node_if(_End); _Node_if* _If_empty_str = new _Node_if(_End); @@ -3226,13 +3231,14 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_rep2(int _Min, int _Max, bool _Gre swap(_If_expr->_Next->_Prev, _If_empty_str->_Next->_Prev); // intentional ADL swap(_If_expr->_Next, _If_empty_str->_Next); // intentional ADL } - } else { - _Node_end_rep* _Node0 = new _Node_end_rep(); - _Node_rep* _Nx = new _Node_rep(_Greedy, _Min, _Max, _Node0, _Root->_Loops++); - _Node0->_Begin_rep = _Nx; - _Link_node(_Node0); - _Insert_node(_Pos, _Nx); + return; } + + _Node_end_rep* _Node0 = new _Node_end_rep(); + _Node_rep* _Nx = new _Node_rep(_Greedy, _Min, _Max, _Node0, _Root->_Loops++); + _Node0->_Begin_rep = _Nx; + _Link_node(_Node0); + _Insert_node(_Pos, _Nx); } template @@ -3311,26 +3317,30 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node int _Ix = 0; _Tgt_state_t<_It> _St = _Tgt_state; - for (; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps - // GH-5365: We have to reset the capture groups from the second iteration on. - // We can avoid the reset for the first iteration - // because we know that a simple repetition was not encountered before. - if (_Ix > 0) { - _Tgt_state._Grp_valid = _St._Grp_valid; - } - - _It _Cur = _Tgt_state._Cur; + if (0 < _Node->_Min) { + // GH-5365: We can avoid resetting capture groups for the first iteration + // because we know that a simple repetition of this loop was not encountered before. if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail - _Tgt_state = _St; return false; - } else if (_Cur == _Tgt_state._Cur) { - _Ix = _Node->_Min - 1; // skip matches that don't change state + } else if (_Tgt_state._Cur == _St._Cur) { // matches empty string + // loop is branchless, so it will only ever match empty strings + // -> skip all other matches as they don't change state and immediately try tail + return _Match_pat(_Node->_End_rep->_Next); + } else { // loop never matches the empty string + for (_Ix = 1; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps + // GH-5365: We have to reset the capture groups from the second iteration on. + _Tgt_state._Grp_valid = _St._Grp_valid; + if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail + return false; + } + } } } - _Tgt_state_t<_It> _Final = _Tgt_state; - bool _Matched0 = false; - _It _Saved_pos = _Tgt_state._Cur; + _Tgt_state_t<_It> _Final; + bool _Matched0 = false; + _It _Saved_pos = _Tgt_state._Cur; + bool _Done = false; if (_Match_pat(_Node->_End_rep->_Next)) { if (!_Greedy) { @@ -3342,32 +3352,58 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node _Matched0 = true; } - while (_Node->_Max == -1 || _Ix++ < _Node->_Max) { // try another rep/tail match + if (_Ix == 0 && _Node->_Max != 0) { _Tgt_state._Cur = _Saved_pos; _Tgt_state._Grp_valid = _St._Grp_valid; - if (!_Match_pat(_Node->_Next)) { - break; // rep match failed, quit loop - } - _It _Mid = _Tgt_state._Cur; - if (_Match_pat(_Node->_End_rep->_Next)) { - if (!_Greedy) { + if (!_Match_pat(_Node->_Next)) { // rep match failed, we are done + _Done = true; + } else if (_Saved_pos == _Tgt_state._Cur) { // match empty, try no more repetitions + _Done = true; + // we only potentially accept/try tail for POSIX + if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) { return true; // go with current match } + } else { + _Saved_pos = _Tgt_state._Cur; + if (_Match_pat(_Node->_End_rep->_Next)) { + if (!_Greedy) { + return true; // go with current match + } - // record match and continue - _Final = _Tgt_state; - _Matched0 = true; + // record match and continue + _Final = _Tgt_state; + _Matched0 = true; + } } + _Ix = 1; + } - if (_Saved_pos == _Mid) { - break; // rep match ate no additional elements, quit loop - } + if (!_Done) { + while (_Node->_Max == -1 || _Ix++ < _Node->_Max) { // try another rep/tail match + _Tgt_state._Cur = _Saved_pos; + _Tgt_state._Grp_valid = _St._Grp_valid; + if (!_Match_pat(_Node->_Next) || _Tgt_state._Cur == _Saved_pos) { + break; // rep match failed, quit loop + } - _Saved_pos = _Mid; + // since loop is branchless, empty rep match is not possible at this point + _Saved_pos = _Tgt_state._Cur; + if (_Match_pat(_Node->_End_rep->_Next)) { + if (!_Greedy) { + return true; // go with current match + } + + // record match and continue + _Final = _Tgt_state; + _Matched0 = true; + } + } } - _Tgt_state = _Matched0 ? _Final : _St; + if (_Matched0) { // record final match + _Tgt_state = _Final; + } return _Matched0; } @@ -3381,61 +3417,56 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node, _It* _Loop_iter_sav = static_cast<_It*>(_Psav->_Loop_iter); bool _Progress = _Init_idx == 0 || *_Loop_iter_sav != _St._Cur; - if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) { - _Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail - } else if (_Init_idx < _Node->_Min) { // try a required rep - if (!_Progress) { - _Matched0 = _Match_pat(_Node->_End_rep->_Next); // empty, try tail - } else { // try another required match - _Psav->_Loop_idx = _Init_idx + 1; - _Psav->_Loop_iter = _STD addressof(_St._Cur); - _STD fill(_Tgt_state._Grp_valid.begin() + static_cast(_Psav->_Group_first), - _Tgt_state._Grp_valid.end(), false); - _Matched0 = _Match_pat(_Node->_Next); - } - } else if (_Longest) { // longest, try any number of repetitions - - // match with no further repetition - _Matched0 = _Match_pat(_Node->_End_rep->_Next); - // match with at least one more repetition if last repetition made progress - if (_Progress) { + if (_Init_idx < _Node->_Min) { // try another required match + _Psav->_Loop_iter = _STD addressof(_St._Cur); + _Psav->_Loop_idx = _Progress ? _Init_idx + 1 : _Node->_Min; // try only one more match after an empty match + _STD fill(_Tgt_state._Grp_valid.begin() + static_cast(_Psav->_Group_first), + _Tgt_state._Grp_valid.end(), false); + _Matched0 = _Match_pat(_Node->_Next); + } else if (_Init_idx == _Node->_Min || _Progress) { + if (0 <= _Node->_Max && _Node->_Max <= _Init_idx) { + _Matched0 = _Match_pat(_Node->_End_rep->_Next); // reps done, try tail + } else if (_Longest) { // longest, try any number of repetitions + + // match with no further repetition + _Matched0 = _Match_pat(_Node->_End_rep->_Next); + + // try to match with one more repetition _Tgt_state = _St; _Psav->_Loop_idx = _Init_idx + 1; _Psav->_Loop_iter = _STD addressof(_St._Cur); - if (_Match_pat(_Node->_Next)) { // always call _Match_pat, even when _Matched0 is already true _Matched0 = true; } - } - } else if (!_Greedy) { // not greedy, favor minimum number of reps - _Matched0 = _Match_pat(_Node->_End_rep->_Next); - if (!_Matched0 && _Progress) { // tail failed, try another rep - _Tgt_state = _St; - _Psav->_Loop_idx = _Init_idx + 1; - _Psav->_Loop_iter = _STD addressof(_St._Cur); - _STD fill(_Tgt_state._Grp_valid.begin() + static_cast(_Psav->_Group_first), - _Tgt_state._Grp_valid.end(), false); - _Matched0 = _Match_pat(_Node->_Next); - } - } else { // greedy, favor maximum number of reps - if (_Progress) { // try another rep + } else if (!_Greedy) { // not greedy, favor minimum number of reps + _Matched0 = _Match_pat(_Node->_End_rep->_Next); + if (!_Matched0) { // tail failed, try another rep + _Tgt_state = _St; + _Psav->_Loop_idx = _Init_idx + 1; + _Psav->_Loop_iter = _STD addressof(_St._Cur); + _STD fill(_Tgt_state._Grp_valid.begin() + static_cast(_Psav->_Group_first), + _Tgt_state._Grp_valid.end(), false); + _Matched0 = _Match_pat(_Node->_Next); + } + } else { // greedy, favor maximum number of reps, + // so try another rep _Psav->_Loop_idx = _Init_idx + 1; _Psav->_Loop_iter = _STD addressof(_St._Cur); _STD fill(_Tgt_state._Grp_valid.begin() + static_cast(_Psav->_Group_first), _Tgt_state._Grp_valid.end(), false); _Matched0 = _Match_pat(_Node->_Next); - } - if ((_Progress || 1 >= _Init_idx) && !_Matched0) { // rep failed, try tail - _Psav->_Loop_idx = _Loop_idx_sav; - _Psav->_Loop_iter = _Loop_iter_sav; - _Tgt_state = _St; - _Matched0 = _Match_pat(_Node->_End_rep->_Next); + if (!_Matched0) { // rep failed, try tail + _Psav->_Loop_idx = _Loop_idx_sav; + _Psav->_Loop_iter = _Loop_iter_sav; + _Tgt_state = _St; + _Matched0 = _Match_pat(_Node->_End_rep->_Next); + } } - } - - if (!_Matched0) { - _Tgt_state = _St; + } else if (_Init_idx == 1 && (_Sflags & regex_constants::_Any_posix)) { + // POSIX allows an empty repetition if the subexpression is matched only once, + // so try tail + _Matched0 = _Match_pat(_Node->_End_rep->_Next); } _Psav->_Loop_idx = _Loop_idx_sav; @@ -3456,9 +3487,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* // No capture group reset is performed for POSIX regexes, // so we prevent any reset by setting the first capture group to the number of capture groups _Ncap. if (_Psav->_Group_first == 0) { - constexpr auto _Any_posix = regex_constants::basic | regex_constants::extended | regex_constants::grep - | regex_constants::egrep | regex_constants::awk; - if ((_Sflags & _Any_posix) || !_Find_first_inner_capture_group(_Node->_Next, _Psav)) { + if ((_Sflags & regex_constants::_Any_posix) || !_Find_first_inner_capture_group(_Node->_Next, _Psav)) { _Psav->_Group_first = _Ncap; } } @@ -3833,10 +3862,8 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N if (_Tgt_state._Cur == _End) { _Failed = true; } else { - constexpr auto _Any_posix = regex_constants::basic | regex_constants::extended | regex_constants::grep - | regex_constants::egrep | regex_constants::awk; const _Elem _Ch = *_Tgt_state._Cur; - if (_Sflags & _Any_posix) { + if (_Sflags & regex_constants::_Any_posix) { if (_Ch == _Elem()) { _Failed = true; } @@ -4923,10 +4950,18 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( _Calculate_loop_simplicity(static_cast<_Node_assert*>(_Nx)->_Child, nullptr, nullptr); break; case _N_rep: - // _Node_rep inside another _Node_rep makes both not simple + // _Node_rep inside another _Node_rep makes both not simple if _Outer_rep can be repeated more than once + // because _Matcher2::_Do_rep0() does not reset capture group boundaries when control is returned to it. + // If _Outer_rep can repeat at most once, we have to analyze the structure of the inner loop. if (_Outer_rep) { - _Outer_rep->_Simple_loop = 0; - static_cast<_Node_rep*>(_Nx)->_Simple_loop = 0; + _Outer_rep->_Simple_loop = 0; + auto _Inner_rep = static_cast<_Node_rep*>(_Nx); + if (_Outer_rep->_Max >= 0 && _Outer_rep->_Max <= 1) { + _Calculate_loop_simplicity(_Inner_rep->_Next, _Inner_rep->_End_rep->_Next, _Inner_rep); + _Nx = _Inner_rep->_End_rep; + } else { + _Inner_rep->_Simple_loop = 0; + } } else { _Outer_rep = static_cast<_Node_rep*>(_Nx); } diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index 36c8ef1b8ac..59c4f14cc99 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -1698,6 +1698,197 @@ void test_gh_5377() { } } +void test_gh_5490() { + // GH-5490: Optional empty repetitions are illegal + + // ECMA-262 15.10.2.5 "Term": + // "If min is zero and y's endIndex is equal to x's endIndex, then return failure." + // So if no additional repetition is required due to minimum requirements, the match should be rejected. + + // Similarly, POSIX 9.3.6 and 9.4.6 state that a null expression can only be matched if this is the only match or it + // is necessary to satisfy the minimum number of repetitions. + // Note the subtle difference that the empty match is allowed if it is the only match. + for (string pattern : {"()*", "()?", "()*?", "()??", "(){0,}", "(){0,}?", "(){0,1}", "(){0,1}?"}) { + test_regex quantified_empty_regex_ecma(&g_regexTester, pattern, ECMAScript); + quantified_empty_regex_ecma.should_search_match_capture_groups("", "", match_default, {{-1, -1}}); + quantified_empty_regex_ecma.should_search_match_capture_groups("b", "", match_default, {{-1, -1}}); + + // empty groups are not allowed in EREs + } + + for (string pattern : {"()+", "()+?", "(){2,}?", "(){1}"}) { + test_regex quantified_empty_regex_ecma(&g_regexTester, pattern, ECMAScript); + quantified_empty_regex_ecma.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + quantified_empty_regex_ecma.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + + // empty groups are not allowed in EREs + } + + for (auto option : {basic, grep}) { + test_regex quantified_empty_regex_bre(&g_regexTester, R"(\(\)*)", option); + quantified_empty_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + quantified_empty_regex_bre.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + } + + for (auto option : {ECMAScript, extended, egrep, awk}) { + test_regex simple_sequence_regex_ecma_or_ere(&g_regexTester, "(ab)*", option); + simple_sequence_regex_ecma_or_ere.should_search_match_capture_groups("", "", match_default, {{-1, -1}}); + simple_sequence_regex_ecma_or_ere.should_search_match_capture_groups("b", "", match_default, {{-1, -1}}); + simple_sequence_regex_ecma_or_ere.should_search_match_capture_groups("ababcc", "abab", match_default, {{2, 4}}); + } + + for (auto option : {basic, grep}) { + test_regex simple_sequence_regex_bre(&g_regexTester, R"(\(ab\)*)", option); + simple_sequence_regex_bre.should_search_match_capture_groups("", "", match_default, {{-1, -1}}); + simple_sequence_regex_bre.should_search_match_capture_groups("b", "", match_default, {{-1, -1}}); + simple_sequence_regex_bre.should_search_match_capture_groups("ababcc", "abab", match_default, {{2, 4}}); + } + + for (string pattern : {"(ab(?=ab))*", "(ab(?!cc))*"}) { + test_regex nested_assertion_regex_ecma(&g_regexTester, pattern, ECMAScript); + nested_assertion_regex_ecma.should_search_match_capture_groups("", "", match_default, {{-1, -1}}); + nested_assertion_regex_ecma.should_search_match_capture_groups("b", "", match_default, {{-1, -1}}); + nested_assertion_regex_ecma.should_search_match_capture_groups("ababcc", "ab", match_default, {{0, 2}}); + nested_assertion_regex_ecma.should_search_match_capture_groups("abababcc", "abab", match_default, {{2, 4}}); + } + + for (string pattern : {"(a*)*", "(a?)*", "(a?)?"}) { + test_regex nested_quantifier_regex_ecma(&g_regexTester, pattern, ECMAScript); + nested_quantifier_regex_ecma.should_search_match_capture_groups("", "", match_default, {{-1, -1}}); + nested_quantifier_regex_ecma.should_search_match_capture_groups("b", "", match_default, {{-1, -1}}); + nested_quantifier_regex_ecma.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + + for (auto option : {extended, egrep, awk}) { + test_regex nested_quantifier_regex_ere(&g_regexTester, pattern, option); + nested_quantifier_regex_ere.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + nested_quantifier_regex_ere.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + nested_quantifier_regex_ere.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {R"(\(a*\)*)", R"(\(a\{0,1\}\)*)", R"(\(a\{0,1\}\)\{0,1\})"}) { + for (auto option : {basic, grep}) { + test_regex nested_quantifier_regex_bre(&g_regexTester, pattern, option); + nested_quantifier_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + nested_quantifier_regex_bre.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + nested_quantifier_regex_bre.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {"(a*)+", "(a?)+"}) { + for (auto option : {ECMAScript, extended, egrep, awk}) { + test_regex plus_quantifier_regex_ecma_or_ere(&g_regexTester, pattern, option); + plus_quantifier_regex_ecma_or_ere.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + plus_quantifier_regex_ecma_or_ere.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + plus_quantifier_regex_ecma_or_ere.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {R"(\(a*\)\{1,\})", R"(\(a\{0,1\}\)\{1,\})"}) { + for (auto option : {basic, grep}) { + test_regex plus_quantifier_regex_bre(&g_regexTester, pattern, option); + plus_quantifier_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + plus_quantifier_regex_bre.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + plus_quantifier_regex_bre.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {"(a*){1}", "(a?){1}"}) { + for (auto option : {ECMAScript, extended, egrep, awk}) { + test_regex repeat_once_regex_ecma_or_ere(&g_regexTester, pattern, option); + repeat_once_regex_ecma_or_ere.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeat_once_regex_ecma_or_ere.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + repeat_once_regex_ecma_or_ere.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {R"(\(a*\)\{1\})", R"(\(a\{0,1\}\)\{1\})"}) { + for (auto option : {basic, grep}) { + test_regex repeat_once_regex_bre(&g_regexTester, pattern, option); + repeat_once_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeat_once_regex_bre.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + // leftmost-longest rule according to Boost semantics + repeat_once_regex_bre.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {"(a*){2}", "(a?){2}"}) { + test_regex repeat_twice_regex_ecma(&g_regexTester, pattern, ECMAScript); + repeat_twice_regex_ecma.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeat_twice_regex_ecma.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + repeat_twice_regex_ecma.should_search_match_capture_groups("a", "a", match_default, {{1, 1}}); + + for (auto option : {extended, egrep, awk}) { + test_regex repeat_twice_regex_ere(&g_regexTester, pattern, option); + repeat_twice_regex_ere.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeat_twice_regex_ere.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + // leftmost-longest rule according to Boost semantics + repeat_twice_regex_ere.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {R"(\(a*\)\{2\})", R"(\(a\{0,1\}\)\{2\})"}) { + for (auto option : {basic, grep}) { + test_regex repeat_twice_regex_bre(&g_regexTester, pattern, option); + repeat_twice_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeat_twice_regex_bre.should_search_match_capture_groups("b", "", match_default, {{0, 0}}); + // leftmost-longest rule according to Boost semantics + repeat_twice_regex_bre.should_search_match_capture_groups("a", "a", match_default, {{0, 1}}); + } + } + + for (string pattern : {"(a?a?){2}", "(a?a?)+"}) { + test_regex repeated_double_question_regex_ecma(&g_regexTester, pattern, ECMAScript); + repeated_double_question_regex_ecma.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeated_double_question_regex_ecma.should_search_match_capture_groups("bbb", "", match_default, {{0, 0}}); + repeated_double_question_regex_ecma.should_search_match_capture_groups("aaa", "aaa", match_default, {{2, 3}}); + + for (auto option : {extended, egrep, awk}) { + test_regex repeated_double_question_regex_ere(&g_regexTester, pattern, option); + repeated_double_question_regex_ere.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeated_double_question_regex_ere.should_search_match_capture_groups("bbb", "", match_default, {{0, 0}}); + // leftmost-longest rule according to Boost semantics + repeated_double_question_regex_ere.should_search_match_capture_groups( + "aaa", "aaa", match_default, {{1, 3}}); + } + } + + for (string pattern : {R"(\(a\{0,1\}a\{0,1\}\)\{2\})", R"(\(a\{0,1\}a\{0,1\}\)\{1,\})"}) { + for (auto option : {basic, grep}) { + test_regex repeated_double_question_regex_bre(&g_regexTester, pattern, option); + repeated_double_question_regex_bre.should_search_match_capture_groups("", "", match_default, {{0, 0}}); + repeated_double_question_regex_bre.should_search_match_capture_groups("bbb", "", match_default, {{0, 0}}); + // leftmost-longest rule according to Boost semantics + repeated_double_question_regex_bre.should_search_match_capture_groups( + "aaa", "aaa", match_default, {{1, 3}}); + } + } + + { + test_regex backref_ecma(&g_regexTester, R"(a(b?)+c\1d)", ECMAScript); + backref_ecma.should_search_fail("abcd"); + backref_ecma.should_search_match_capture_groups("acd", "acd", match_default, {{1, 1}}); + backref_ecma.should_search_match_capture_groups("abcbd", "abcbd", match_default, {{1, 2}}); + } + + for (auto option : {basic, grep}) { + test_regex backref_bre(&g_regexTester, R"(a\(b\{0,1\}\)\{1,\}c\1d)", option); + backref_bre.should_search_fail("abcd"); + backref_bre.should_search_match_capture_groups("acd", "acd", match_default, {{1, 1}}); + backref_bre.should_search_match_capture_groups("abcbd", "abcbd", match_default, {{1, 2}}); + } + + { + test_regex backref_min_repeat_ecma(&g_regexTester, R"((a?){3,4}b\1c)", ECMAScript); + backref_min_repeat_ecma.should_search_match_capture_groups("aabc", "aabc", match_default, {{2, 2}}); + } + + for (auto option : {basic, grep}) { + test_regex backref_min_repeat_bre(&g_regexTester, R"(\(a\{0,1\}\)\{3,4\}b\1c)", option); + backref_min_repeat_bre.should_search_match_capture_groups("aabc", "aabc", match_default, {{2, 2}}); + } +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -1744,6 +1935,7 @@ int main() { test_gh_5371(); test_gh_5374(); test_gh_5377(); + test_gh_5490(); return g_regexTester.result(); } diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp index d52371192de..83706bb66bc 100644 --- a/tests/tr1/tests/regex2/test.cpp +++ b/tests/tr1/tests/regex2/test.cpp @@ -741,7 +741,8 @@ static const regex_test tests[] = { {__LINE__, T("a(bbb+|bb+|b)b"), T("abbb"), "2 0 4 1 3", NOT_BG}, {__LINE__, T("a(bbb+|bb+|b)bb"), T("abbb"), "2 0 4 1 2", NOT_BG}, {__LINE__, T("(.*).*"), T("abcdef"), "2 0 6 0 6", NOT_BG}, - {__LINE__, T("(a*)*"), T("bc"), "2 0 0 0 0", NOT_BG}, + {__LINE__, T("(a*)*"), T("bc"), "2 0 0 -1 -1", ECMA}, + {__LINE__, T("(a*)*"), T("bc"), "2 0 0 0 0", EEA}, {__LINE__, T("a(b|c)*d"), T("ad"), "2 0 2 -1 -1", NOT_BG}, {__LINE__, T("a(b|c)*d"), T("abcd"), "2 0 4 2 3", NOT_BG}, {__LINE__, T("a(b|c)+d"), T("abd"), "2 0 3 1 2", NOT_BG},