Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,7 @@ private:
// lexing
[[noreturn]] void _Error(regex_constants::error_type);

bool _Is_esc() const;
bool _Is_esc(_FwdIt) const;
void _Trans();
void _Next();
void _Expect(_Meta_type, regex_constants::error_type);
Expand Down Expand Up @@ -3875,8 +3875,7 @@ template <class _FwdIt, class _Elem, class _RxTraits>
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Parser<_FwdIt, _Elem, _RxTraits>::_Is_esc() const { // assumes _Pat != _End
_FwdIt _Ch0 = _Pat;
bool _Parser<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End
return ++_Ch0 != _End
&& ((!(_L_flags & _L_nex_grp) && (*_Ch0 == _Meta_lpar || *_Ch0 == _Meta_rpar))
|| (!(_L_flags & _L_nex_rep) && (*_Ch0 == _Meta_lbr || *_Ch0 == _Meta_rbr)));
Expand All @@ -3897,7 +3896,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char
}
switch (_Char) { // handle special cases
case _Meta_esc:
if (_Is_esc()) { // replace escape sequence
if (_Is_esc(_Pat)) { // replace escape sequence
_FwdIt _Ch0 = _Pat;
_Mchar = static_cast<_Meta_type>(_Char = *++_Ch0);
}
Expand Down Expand Up @@ -3941,9 +3940,28 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char

case _Meta_dlr:
{ // check if $ is special
_FwdIt _Ch0 = _Pat;
if ((_L_flags & _L_anch_rstr) && ++_Ch0 != _End && *_Ch0 != _Meta_nl) {
_Mchar = _Meta_chr;
_FwdIt _Next = _Pat;
if ((_L_flags & _L_anch_rstr) && ++_Next != _End) {
const bool _Escaped = *_Next == _Meta_esc && _Is_esc(_Next);
if (_Escaped) {
++_Next;
}

// Only the basic and grep grammars set _L_anch_rstr, so _L_alt_pipe and _L_nex_grp must be unset.
// Therefore, we don't need to handle "dollar followed by pipe '|' for alternation"
// or "dollar followed by non-escaped right parenthesis ')' closing a group" below.
_STL_INTERNAL_CHECK((_L_flags & (_L_alt_pipe | _L_nex_grp)) == 0);

const _Elem _Ch = *_Next;
const bool _Is_end_of_alternative =
((_L_flags & _L_alt_nl) && _Ch == _Meta_nl
&& _Disj_count == 0) // dollar followed by newline '\n' for alternation
|| (_Escaped && _Ch == _Meta_rpar
&& _Disj_count != 0); // dollar followed by (escaped) right parenthesis ')' closing a group

if (!_Is_end_of_alternative) {
_Mchar = _Meta_chr;
}
}

break;
Expand Down Expand Up @@ -3972,7 +3990,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char
template <class _FwdIt, class _Elem, class _RxTraits>
void _Parser<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character
if (_Pat != _End) { // advance
if (*_Pat == _Meta_esc && _Is_esc()) {
if (*_Pat == _Meta_esc && _Is_esc(_Pat)) {
++_Pat;
}

Expand Down
192 changes: 191 additions & 1 deletion tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1031,7 +1031,6 @@ void test_gh_5165_grep() {
middle_nl_with_caret.should_search_fail("^a");
middle_nl_with_caret.should_search_fail("ca");
middle_nl_with_caret.should_search_fail("^b");
middle_nl_with_caret.should_search_fail("ca");
middle_nl_with_caret.should_search_fail("cb");
}
{
Expand Down Expand Up @@ -1171,6 +1170,196 @@ void test_gh_5253() {
g_regexTester.should_not_match("a", "()*");
}

void test_gh_5362_syntax_option(const syntax_option_type basic_or_grep) {
{
const test_regex ending_anchor(&g_regexTester, "meo[wW]$", basic_or_grep);
ending_anchor.should_search_match("kitten_meow", "meow");
ending_anchor.should_search_fail("homeowner");
}
{
const test_regex middle_anchor(&g_regexTester, "me$o[wW]", basic_or_grep);
middle_anchor.should_search_fail("kitten_meow");
middle_anchor.should_search_fail("homeowner");
middle_anchor.should_search_match("home$owner", "me$ow");
}
{
const test_regex double_dollars(&g_regexTester, "meo[wW]$$", basic_or_grep);
double_dollars.should_search_fail("kitten_meow");
double_dollars.should_search_fail("homeowner");
double_dollars.should_search_match("kitten_meow$", "meow$");
double_dollars.should_search_fail("kitten_meow$$");
double_dollars.should_search_fail("homeow$ner");
double_dollars.should_search_fail("homeow$$ner");
}

g_regexTester.should_not_match("me$ow", R"(\(me$\)o[wW])", basic_or_grep);
g_regexTester.should_not_match("meow", R"(\(me$\)o[wW])", basic_or_grep);

{
const test_regex singlegroup_anchor(&g_regexTester, R"(\(meo[wW]$\))", basic_or_grep);
singlegroup_anchor.should_search_match("kitten_meow", "meow");
singlegroup_anchor.should_search_fail("kitten_meow$");
singlegroup_anchor.should_search_fail("homeowner");
singlegroup_anchor.should_search_fail("homeow$ner");
}
{
const test_regex suffixedgroup_anchor(&g_regexTester, R"(\(meo[wW]$\).*)", basic_or_grep);
suffixedgroup_anchor.should_search_match("kitten_meow", "meow");
suffixedgroup_anchor.should_search_fail("kitten_meow$");
suffixedgroup_anchor.should_search_fail("homeowner");
suffixedgroup_anchor.should_search_fail("homeow$ner");
}
{
const test_regex firstgroup_anchor(&g_regexTester, R"(\(meo[wW]$\)\(.*\))", basic_or_grep);
firstgroup_anchor.should_search_match("kitten_meow", "meow");
firstgroup_anchor.should_search_fail("kitten_meow$");
firstgroup_anchor.should_search_fail("homeowner");
firstgroup_anchor.should_search_fail("homeow$ner");
}
{
const test_regex nested_anchor(&g_regexTester, R"(\(\(meo[wW]$\)$\).*)", basic_or_grep);
nested_anchor.should_search_match("kitten_meow", "meow");
nested_anchor.should_search_fail("kitten_meow$");
nested_anchor.should_search_fail("kitten_meow$$");
nested_anchor.should_search_fail("homeowner");
nested_anchor.should_search_fail("homeow$ner");
nested_anchor.should_search_fail("homeow$$ner");
}
{
const test_regex double_dollars(&g_regexTester, R"(\(meo[wW]$$\).*)", basic_or_grep);
double_dollars.should_search_fail("kitten_meow");
double_dollars.should_search_match("kitten_meow$", "meow$");
double_dollars.should_search_fail("kitten_meow$$");
double_dollars.should_search_fail("homeowner");
double_dollars.should_search_fail("homeow$ner");
double_dollars.should_search_fail("homeow$$ner");
}

// Validate that there is no special behavior near bars,
// as they are alternation operators in regex modes other than basic or grep.
{
const test_regex middle_bar(&g_regexTester, "a|a$", basic_or_grep);
middle_bar.should_search_match("a|a", "a|a");
middle_bar.should_search_fail("a|a$");
middle_bar.should_search_fail("a|ab");
middle_bar.should_search_fail("a");
}
{
const test_regex group_middle_bar(&g_regexTester, R"(\(a|a\)$)", basic_or_grep);
group_middle_bar.should_search_match("a|a", "a|a");
group_middle_bar.should_search_fail("a|a$");
group_middle_bar.should_search_fail("a|ab");
group_middle_bar.should_search_fail("a");
}
{
const test_regex middle_bar_with_dollar(&g_regexTester, "a$|b$", basic_or_grep);
middle_bar_with_dollar.should_search_match("a$|b", "a$|b");
middle_bar_with_dollar.should_search_fail("a|b");
middle_bar_with_dollar.should_search_fail("a$|b$");
middle_bar_with_dollar.should_search_fail("a$|bc");
middle_bar_with_dollar.should_search_fail("a");
middle_bar_with_dollar.should_search_fail("b");
}
{
const test_regex group_middle_bar_with_dollar(&g_regexTester, R"(\(a$|b\)$)", basic_or_grep);
group_middle_bar_with_dollar.should_search_match("a$|b", "a$|b");
group_middle_bar_with_dollar.should_search_fail("a|b");
group_middle_bar_with_dollar.should_search_fail("a$|b$");
group_middle_bar_with_dollar.should_search_fail("a$|bc");
group_middle_bar_with_dollar.should_search_fail("a");
group_middle_bar_with_dollar.should_search_fail("b");
}
}

void test_gh_5362_basic() {
// test cases specific for basic regular expressions
{
const test_regex middle_nl(&g_regexTester, "a\na$", basic);
middle_nl.should_search_match("a\na", "a\na");
middle_nl.should_search_fail("a\na$");
middle_nl.should_search_fail("a\nab");
middle_nl.should_search_fail("a");
}
{
const test_regex group_middle_nl(&g_regexTester, "\\(a\na\\)$", basic);
group_middle_nl.should_search_match("a\na", "a\na");
group_middle_nl.should_search_fail("a\na$");
group_middle_nl.should_search_fail("a\nab");
group_middle_nl.should_search_fail("a");
}
{
const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", basic);
middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb");
middle_nl_with_dollar.should_search_fail("a\nb");
middle_nl_with_dollar.should_search_fail("a$\nb$");
middle_nl_with_dollar.should_search_fail("a$\nbc");
middle_nl_with_dollar.should_search_fail("a");
middle_nl_with_dollar.should_search_fail("b");
}
{
const test_regex group_middle_nl_with_dollar(&g_regexTester, "\\(a$\nb\\)$", basic);
group_middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb");
group_middle_nl_with_dollar.should_search_fail("a\nb");
group_middle_nl_with_dollar.should_search_fail("a$\nb$");
group_middle_nl_with_dollar.should_search_fail("a$\nbc");
group_middle_nl_with_dollar.should_search_fail("a");
group_middle_nl_with_dollar.should_search_fail("b");
}
}

void test_gh_5362_grep() {
// test cases specific for grep mode
{
const test_regex middle_nl(&g_regexTester, "a\na$", grep);
middle_nl.should_search_match("a\na$", "a");
middle_nl.should_search_match("a\nab", "a");
middle_nl.should_search_match("a", "a");
middle_nl.should_search_fail("b");
}
{
// This regular expression is not accepted by POSIX grep, but currently the regex parser does not reject it.
// If the parser is changed to reject it, adjust this test case.
const test_regex group_middle_nl(&g_regexTester, "\\(a\na\\)$", grep);
group_middle_nl.should_search_match("a\na", "a\na");
group_middle_nl.should_search_fail("a\na$");
group_middle_nl.should_search_fail("a\nac");
group_middle_nl.should_search_fail("a");
}
{
const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", grep);
middle_nl_with_dollar.should_search_match("a$\nb", "b");
middle_nl_with_dollar.should_search_match("a\nb", "a");
middle_nl_with_dollar.should_search_match("ba", "a");
middle_nl_with_dollar.should_search_match("a", "a");
middle_nl_with_dollar.should_search_match("b", "b");
middle_nl_with_dollar.should_search_match("ab", "b");
middle_nl_with_dollar.should_search_fail("a$");
middle_nl_with_dollar.should_search_fail("ac");
middle_nl_with_dollar.should_search_fail("b$");
middle_nl_with_dollar.should_search_fail("bc");
}
{
// This regular expression is not accepted by POSIX grep, but currently the regex parser does not reject it.
// If the parser is changed to reject it, adjust this test case.
const test_regex group_middle_nl_with_dollar(&g_regexTester, "\\(a$\nb\\)$", grep);
group_middle_nl_with_dollar.should_search_match("a$\nb", "a$\nb");
group_middle_nl_with_dollar.should_search_fail("a\nb");
group_middle_nl_with_dollar.should_search_fail("a$\nb$");
group_middle_nl_with_dollar.should_search_fail("a$\nbc");
group_middle_nl_with_dollar.should_search_fail("a");
group_middle_nl_with_dollar.should_search_fail("b");
}
}

void test_gh_5362() {
// GH-5362: `<regex>`: Properly parse dollar anchors in basic and grep mode
test_gh_5362_syntax_option(basic);
test_gh_5362_syntax_option(grep);

test_gh_5362_basic();
test_gh_5362_grep();
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -1208,6 +1397,7 @@ int main() {
test_gh_5192();
test_gh_5214();
test_gh_5253();
test_gh_5362();

return g_regexTester.result();
}