From a192d76e5516576769d2a8dd110338b79dd46429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sat, 29 Mar 2025 18:15:40 +0100 Subject: [PATCH 01/14] ``: Revise parsing of escape sequences --- stl/inc/regex | 97 +- tests/std/test.lst | 1 + .../GH_005244_regex_escape_sequences/env.lst | 4 + .../GH_005244_regex_escape_sequences/test.cpp | 874 ++++++++++++++++++ tests/tr1/tests/regex2/test.cpp | 73 +- 5 files changed, 974 insertions(+), 75 deletions(-) create mode 100644 tests/std/tests/GH_005244_regex_escape_sequences/env.lst create mode 100644 tests/std/tests/GH_005244_regex_escape_sequences/test.cpp diff --git a/stl/inc/regex b/stl/inc/regex index 17fd2ee72c5..4968917f9c2 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1733,15 +1733,15 @@ private: bool _OctalDigits(); void _Do_ex_class(_Meta_type); bool _CharacterClassEscape(bool); - _Prs_ret _ClassEscape2(); + _Prs_ret _ClassEscape3(); _Prs_ret _ClassAtom(); void _ClassRanges(); void _CharacterClass(); - bool _IdentityEscape(); - bool _IsIdentityEscape() const; + bool _IdentityEscape(bool); + bool _IsIdentityEscape(bool) const; bool _Do_ffn(_Elem); bool _Do_ffnx(_Elem); - bool _CharacterEscape(); + bool _CharacterEscape(bool); void _AtomEscape(); void _Do_capture_group(); void _Do_noncapture_group(); @@ -1781,7 +1781,7 @@ enum _Lang_flags { // describe language properties _L_esc_uni = 0x00000800, // has Unicode escape sequences _L_esc_hex = 0x00001000, // has hexadecimal escape sequences _L_esc_oct = 0x00002000, // has octal escape sequences - _L_esc_bsl = 0x00004000, // has escape backslash in character classes + _L_esc_bsp = 0x00004000, // has backspace escape in character classes _L_esc_ffnx = 0x00008000, // has extra file escapes (\a and \b) _L_esc_ffn = 0x00010000, // has limited file escapes (\[fnrtv]) _L_esc_wsd = 0x00020000, // has w, s, and d character set escapes @@ -4093,36 +4093,33 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // } template -_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape2() { // check for class escape - if ((_L_flags & _L_esc_bsl) && _Char == _Esc_bsl) { // handle escape backslash if allowed - _Val = _Esc_bsl; +_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape3() { // check for class escape + if ((_L_flags & _L_esc_bsp) && _Char == _Esc_ctrl_b) { // handle backspace escape _Next(); + _Val = _Meta_bsp; return _Prs_chr; - } else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) { - return _Prs_set; - } else if (_DecimalDigits2(regex_constants::error_escape)) { // check for invalid value - if (_Val != 0) { + } else if ((_L_flags & (_L_bzr_chr | _L_bckr)) + && (_Val = _Traits.value(_Char, 10)) != -1) { // handle \0 and reject other escaped decimal literals + _Next(); + if (!(_L_flags & _L_bzr_chr) || _Val != 0 || _Traits.value(_Char, 10) != -1) { _Error(regex_constants::error_escape); } return _Prs_chr; + } else if (_CharacterEscape(true)) { + return _Prs_chr; + } else if ((_L_flags & _L_esc_wsd) && _CharacterClassEscape(false)) { + return _Prs_set; } - return _CharacterEscape() ? _Prs_chr : _Prs_none; + + _Error(regex_constants::error_escape); } template _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom() { // check for class atom - if (_Mchar == _Meta_esc) { // check for valid escape sequence + if (_Mchar == _Meta_esc && (_L_flags & (_L_grp_esc | _L_ident_awk))) { // check for valid escape sequence _Next(); - if (_L_flags & _L_grp_esc) { - return _ClassEscape2(); - } else if ((_L_flags & _L_esc_ffn && _Do_ffn(_Char)) - || (_L_flags & _L_esc_ffnx && _Do_ffnx(_Char))) { // advance to next character - _Next(); - return _Prs_chr; - } - _Val = _Meta_esc; - return _Prs_chr; + return _ClassEscape3(); } else if (_Mchar == _Meta_lsq) { // check for valid delimited expression _Next(); if (_Mchar == _Meta_colon || _Mchar == _Meta_equal || _Mchar == _Meta_dot) { // handle delimited expression @@ -4278,7 +4275,8 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunct } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape() const { // check for valid identity escape +bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape( + bool _In_character_class) const { // check for valid identity escape if (_L_flags & _L_ident_ECMA) { // ECMAScript identity escape characters switch (_Char) { @@ -4296,26 +4294,28 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape() const { // check for } switch (_Char) { + case _Meta_esc: + // BRE, ERE, awk identity escape characters (anywhere in awk) + return true; case _Meta_dot: case _Meta_lsq: - case _Meta_esc: case _Meta_star: - case _Meta_bar: case _Meta_caret: case _Meta_dlr: - // BRE, ERE, awk identity escape characters - return true; + // BRE, ERE, awk identity escape characters (outside character classes only) + return !_In_character_class; case _Meta_lpar: case _Meta_rpar: + case _Meta_bar: case _Meta_plus: case _Meta_query: case _Meta_lbr: case _Meta_rbr: - // additional ERE identity escape characters - return (_L_flags & _L_ident_ERE) != 0; + // additional ERE identity escape characters (outside character classes only) + return (_L_flags & _L_ident_ERE) != 0 && !_In_character_class; case '"': case '/': - // additional awk identity escape characters + // additional awk identity escape characters (anywhere) return (_L_flags & _L_ident_awk) != 0; default: return false; @@ -4323,8 +4323,9 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape() const { // check for } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape() { // check whether an escape is valid, and process it if so - if (_IsIdentityEscape()) { +bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape( + bool _In_character_class) { // check whether an escape is valid, and process it if so + if (_IsIdentityEscape(_In_character_class)) { _Val = _Char; _Next(); return true; @@ -4366,7 +4367,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the r } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid character escape +bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_class) { // check for valid character escape if (_Mchar == _Meta_eos) { _Error(regex_constants::error_escape); } @@ -4392,7 +4393,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid _Error(regex_constants::error_escape); } } else { - return _IdentityEscape(); + return _IdentityEscape(_In_character_class); } if (_STD _Max_limit() < static_cast(_Val)) { @@ -4406,22 +4407,24 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape() { // check for valid template void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape constexpr int _Bre_max_backref_digits = 1; - if ((_L_flags & _L_bckr) - && _DecimalDigits2(regex_constants::error_backref, - (_L_flags & _L_lim_bckr) ? _Bre_max_backref_digits : INT_MAX)) { // check for valid back reference - if (_Val == 0) { // handle \0 - if (!(_L_flags & _L_bzr_chr)) { - _Error(regex_constants::error_escape); - } else { - _Nfa._Add_char2(static_cast<_Elem>(_Val)); - } + if ((_L_flags & _L_bzr_chr) && _Traits.value(_Char, 10) == 0) { // handle \0 + _Next(); + if (_Traits.value(_Char, 10) != -1) { + _Error(regex_constants::error_escape); + } + _Nfa._Add_char2(_Elem{}); + } else if ((_L_flags & _L_bckr) + && _DecimalDigits2(regex_constants::error_backref, + (_L_flags & _L_lim_bckr) ? _Bre_max_backref_digits : INT_MAX)) { // check for valid back reference + if (_Val == 0) { + _Error(regex_constants::error_escape); } else if (_Grp_idx < static_cast(_Val) || _Finished_grps.size() <= static_cast(_Val) || !_Finished_grps[static_cast(_Val)]) { _Error(regex_constants::error_backref); } else { _Nfa._Add_backreference(static_cast(_Val)); } - } else if (_CharacterEscape()) { + } else if (_CharacterEscape(false)) { _Nfa._Add_char2(static_cast<_Elem>(_Val)); } else if (!(_L_flags & _L_esc_wsd) || !_CharacterClassEscape(true)) { _Error(regex_constants::error_escape); @@ -4526,7 +4529,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte _Error(regex_constants::error_badrepeat); } else if (_Mchar == _Meta_rbr && !(_L_flags & _L_paren_bal)) { _Error(regex_constants::error_brace); - } else if (_Mchar == _Meta_rsq && !(_L_flags & _L_paren_bal)) { + } else if (_Mchar == _Meta_rsq && !(_L_flags & (_L_paren_bal | _L_brk_rstr))) { _Error(regex_constants::error_brack); } else { // add character _Nfa._Add_char2(_Char); @@ -4655,7 +4658,7 @@ _Parser<_FwdIt, _Elem, _RxTraits>::_Parser( : _Pat(_Pfirst), _Begin(_Pfirst), _End(_Plast), _Nfa(_Tr, _Fx), _Traits(_Tr), _Flags(_Fx) { constexpr unsigned int _ECMA_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_nc_grp | _L_asrt_gen - | _L_asrt_wrd | _L_bckr | _L_ngr_rep | _L_esc_uni | _L_esc_hex | _L_esc_bsl + | _L_asrt_wrd | _L_bckr | _L_ngr_rep | _L_esc_uni | _L_esc_hex | _L_esc_bsp | _L_esc_ffn | _L_esc_wsd | _L_esc_ctrl | _L_bzr_chr | _L_grp_esc | _L_ident_ECMA | _L_empty_grp; diff --git a/tests/std/test.lst b/tests/std/test.lst index 079235d8159..292e4255c97 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -256,6 +256,7 @@ tests\GH_004929_internal_tag_constructors tests\GH_004930_char_traits_user_specialization tests\GH_005090_stl_hardening tests\GH_005204_regex_collating_ranges +tests\GH_005244_regex_escape_sequences tests\GH_005315_destructor_tombstones tests\LWG2381_num_get_floating_point tests\LWG2597_complex_branch_cut diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/env.lst b/tests/std/tests/GH_005244_regex_escape_sequences/env.lst new file mode 100644 index 00000000000..19f025bd0e6 --- /dev/null +++ b/tests/std/tests/GH_005244_regex_escape_sequences/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp new file mode 100644 index 00000000000..133444214f8 --- /dev/null +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -0,0 +1,874 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +#include + +using namespace std; +using namespace std::regex_constants; + +regex_fixture g_regexTester; + +template +class test_regex_traits { +private: + using rx_traits = regex_traits; + +public: + using char_type = typename rx_traits::char_type; + using string_type = typename rx_traits::string_type; + using locale_type = typename rx_traits::locale_type; + using char_class_type = typename rx_traits::char_class_type; + + // TRANSITION, GH-995 + using _Uelem = typename rx_traits::_Uelem; + static constexpr auto _Ch_upper = rx_traits::_Ch_upper; + static constexpr auto _Ch_alpha = rx_traits::_Ch_alpha; + + test_regex_traits() = default; + + static size_t length(const charT* p) { + return rx_traits::length(p); + } + + charT translate(const charT c) const { + return inner.translate(c); + } + + charT translate_nocase(const charT c) const { + return inner.translate_nocase(c); + } + + template + string_type transform(FwdIt first, FwdIt last) const { + return inner.transform(first, last); + } + + template + string_type transform_primary(FwdIt first, FwdIt last) const { + return inner.transform_primary(first, last); + } + + template + char_class_type lookup_classname(FwdIt first, FwdIt last, bool icase = false) const { + FwdIt next = first; + ++next; + if (next == last && (*first == 'z' || *first == 'Z')) { + const charT space_class = 's'; + return inner.lookup_classname(&space_class, &space_class + 1, icase); + } + return inner.lookup_classname(first, last, icase); + } + + bool isctype(charT c, char_class_type f) const { + return inner.isctype(c, f); + } + + int value(charT ch, int radix) const { + return inner.value(ch, radix); + } + +private: + regex_traits inner; +}; + +template +void check_match(const string& subject, const string& pattern, const Rx& re, match_flag_type flags, bool matches) { + if (regex_match(subject, re, flags) != matches) { + printf(R"(Expected regex_match("%s", regex("%s", 0x%X)) to be %s.)", subject.c_str(), pattern.c_str(), + re.flags(), matches ? "true" : "false"); + g_regexTester.fail_regex(); + } +} + +template +void check_match(const string& subject, const string& pattern, const Rx& re, match_flag_type flags = match_default) { + check_match(subject, pattern, re, flags, true); +} + +template +void check_no_match(const string& subject, const string& pattern, const Rx& re, match_flag_type flags = match_default) { + check_match(subject, pattern, re, flags, false); +} + +void check_atomescape_controlescape(string expected, string c, syntax_option_type option) { + string pattern = "\\" + c; + g_regexTester.should_match(expected, pattern, option); + g_regexTester.should_not_match("g", pattern, option); + g_regexTester.should_not_match(c, pattern, option); + g_regexTester.should_not_match("\\", pattern, option); + g_regexTester.should_not_match(pattern, pattern, option); +} + +void check_classescape_controlescape(string expected, string c, syntax_option_type option) { + string pattern = "[\\" + c + "]"; + g_regexTester.should_match(expected, pattern, option); + g_regexTester.should_not_match("g", pattern, option); + g_regexTester.should_not_match(c, pattern, option); + g_regexTester.should_not_match("\\", pattern, option); +} + +void check_atomescape_identityescape(string c, syntax_option_type option) { + string pattern = "\\" + c; + g_regexTester.should_match(c, pattern, option); + g_regexTester.should_not_match("g", pattern, option); + g_regexTester.should_not_match("\\", pattern, option); + g_regexTester.should_not_match(pattern, pattern, option); +} + +void check_classescape_identityescape(string c, syntax_option_type option) { + string pattern = "[\\" + c + "]"; + g_regexTester.should_match(c, pattern, option); + g_regexTester.should_not_match("g", pattern, option); + g_regexTester.should_not_match("\\", pattern, option); +} + +void check_classescape_noescape(string c, syntax_option_type option) { + string pattern = "[\\" + c + "]"; + g_regexTester.should_match(c, pattern, option); + g_regexTester.should_match("\\", pattern, option); + g_regexTester.should_not_match("g", pattern, option); + g_regexTester.should_not_match("\\" + c, pattern, option); +} + +void test_gh_5244_atomescape_ecmascript() { + + // AtomEscape :: DecimalEscape + // ECMAScript standard says: + // \ followed by decimal whose first digit is not zero is a backreference. + // It is an error if a backreference does not refer to a capture group. + // \0 refers to literal NUL and must not be followed by another digit. + + // literal NUL tests + g_regexTester.should_match("\0"s, R"(\0)", ECMAScript); + g_regexTester.should_not_match("0", R"(\0)", ECMAScript); + g_regexTester.should_not_match("", R"(\0)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\0)", ECMAScript); + g_regexTester.should_throw(R"(\00)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\01)", error_escape, ECMAScript); + g_regexTester.should_throw(R"((a)\01)", error_escape, ECMAScript); + + // backreference tests + g_regexTester.should_throw(R"(\1)", error_backref, ECMAScript); + g_regexTester.should_throw(R"((a)\10)", error_backref, ECMAScript); + g_regexTester.should_match("aa", R"((a)\1)", ECMAScript); + g_regexTester.should_not_match("aa\\1", R"((a)\1)", ECMAScript); + g_regexTester.should_not_match("aa\\", R"((a)\1)", ECMAScript); + g_regexTester.should_not_match("aa1", R"((a)\1)", ECMAScript); + g_regexTester.should_match("aaaaaaaaabb", R"((.)(.)(.)(.)(.)(.)(.)(.)(.)(b)\10)"); + g_regexTester.should_not_match("aaaaaaaaaba", R"((.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\10)"); + g_regexTester.should_not_match("aaaaaaaaaba0", R"((.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\10)"); + + // AtomEscape :: CharacterEscape :: ControlEscape + check_atomescape_controlescape("\f", "f", ECMAScript); + check_atomescape_controlescape("\n", "n", ECMAScript); + check_atomescape_controlescape("\r", "r", ECMAScript); + check_atomescape_controlescape("\t", "t", ECMAScript); + check_atomescape_controlescape("\v", "v", ECMAScript); + + // AtomEscape :: CharacterEscape :: 'c' ControlLetter + g_regexTester.should_match("\x1", R"(\cA)", ECMAScript); + g_regexTester.should_match("\x1a", R"(\cZ)", ECMAScript); + g_regexTester.should_match("\x1", R"(\ca)", ECMAScript); + g_regexTester.should_match("\x1a", R"(\cz)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\ca)", ECMAScript); + g_regexTester.should_not_match("c", R"(\ca)", ECMAScript); + g_regexTester.should_not_match("ca", R"(\ca)", ECMAScript); + g_regexTester.should_throw(R"(\c0)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\c)", error_escape, ECMAScript); + + // AtomEscape :: CharacterEscape :: HexEscapeSequence + g_regexTester.should_match("\x00"s, R"(\x00)", ECMAScript); + g_regexTester.should_not_match("\\x00", R"(\x00)", ECMAScript); + // clang-format off + g_regexTester.should_match("\x00""0"s, R"(\x000)", ECMAScript); + // clang-format on + g_regexTester.should_match("A", R"(\x41)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\x41)", ECMAScript); + g_regexTester.should_match("\xff", R"(\xff)", ECMAScript); + g_regexTester.should_throw(R"(\x)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\x0)", error_escape, ECMAScript); + + // AtomEscape :: CharacterEscape :: UnicodeEscapeSequence + g_regexTester.should_match("\u0000"s, R"(\u0000)", ECMAScript); + g_regexTester.should_not_match("\\u0000", R"(\u0000)", ECMAScript); + // clang-format off + g_regexTester.should_match("\u0000""0"s, R"(\u00000)", ECMAScript); + // clang-format on + g_regexTester.should_match("A", R"(\u0041)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\u0041)", ECMAScript); + g_regexTester.should_throw(R"(\uffff)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\u2600)", error_escape, ECMAScript); // U+2600 BLACK SUN WITH RAYS + g_regexTester.should_throw(R"(\u)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\u0)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\u00)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\u000)", error_escape, ECMAScript); + + // AtomEscape :: CharacterClassEscape + g_regexTester.should_match("0", R"(\d)", ECMAScript); + g_regexTester.should_match("5", R"(\d)", ECMAScript); + g_regexTester.should_not_match("a", R"(\d)", ECMAScript); + g_regexTester.should_not_match("_", R"(\d)", ECMAScript); + g_regexTester.should_not_match("-", R"(\d)", ECMAScript); + g_regexTester.should_not_match("\1", R"(\d)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\d)", ECMAScript); + g_regexTester.should_not_match("0", R"(\D)", ECMAScript); + g_regexTester.should_not_match("5", R"(\D)", ECMAScript); + g_regexTester.should_match("a", R"(\D)", ECMAScript); + g_regexTester.should_match("_", R"(\D)", ECMAScript); + g_regexTester.should_match("-", R"(\D)", ECMAScript); + g_regexTester.should_match("\1", R"(\D)", ECMAScript); + g_regexTester.should_match("\\", R"(\D)", ECMAScript); + + g_regexTester.should_match("a", R"(\w)", ECMAScript); + g_regexTester.should_match("A", R"(\w)", ECMAScript); + g_regexTester.should_match("0", R"(\w)", ECMAScript); + g_regexTester.should_match("_", R"(\w)", ECMAScript); + g_regexTester.should_not_match("-", R"(\w)", ECMAScript); + g_regexTester.should_not_match("\1", R"(\w)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\w)", ECMAScript); + g_regexTester.should_not_match(" ", R"(\w)", ECMAScript); + g_regexTester.should_not_match("a", R"(\W)", ECMAScript); + g_regexTester.should_not_match("A", R"(\W)", ECMAScript); + g_regexTester.should_not_match("0", R"(\W)", ECMAScript); + g_regexTester.should_not_match("_", R"(\W)", ECMAScript); + g_regexTester.should_match("-", R"(\W)", ECMAScript); + g_regexTester.should_match("\1", R"(\W)", ECMAScript); + g_regexTester.should_match("\\", R"(\W)", ECMAScript); + g_regexTester.should_match(" ", R"(\W)", ECMAScript); + + g_regexTester.should_match(" ", R"(\s)", ECMAScript); + g_regexTester.should_match("\t", R"(\s)", ECMAScript); + g_regexTester.should_match("\n", R"(\s)", ECMAScript); + g_regexTester.should_not_match("a", R"(\s)", ECMAScript); + g_regexTester.should_not_match("0", R"(\s)", ECMAScript); + g_regexTester.should_not_match("_", R"(\s)", ECMAScript); + g_regexTester.should_not_match("-", R"(\s)", ECMAScript); + g_regexTester.should_not_match("\1", R"(\s)", ECMAScript); + g_regexTester.should_not_match("\\", R"(\s)", ECMAScript); + g_regexTester.should_not_match(" ", R"(\S)", ECMAScript); + g_regexTester.should_not_match("\t", R"(\S)", ECMAScript); + g_regexTester.should_not_match("\n", R"(\S)", ECMAScript); + g_regexTester.should_match("a", R"(\S)", ECMAScript); + g_regexTester.should_match("0", R"(\S)", ECMAScript); + g_regexTester.should_match("_", R"(\S)", ECMAScript); + g_regexTester.should_match("-", R"(\S)", ECMAScript); + g_regexTester.should_match("\1", R"(\S)", ECMAScript); + g_regexTester.should_match("\\", R"(\S)", ECMAScript); + + // AtomEscape :: CharacterEscape :: IdentityEscape + g_regexTester.should_match("\\", R"(\\)", ECMAScript); + g_regexTester.should_not_match("g", R"(\\)", ECMAScript); + g_regexTester.should_not_match("\\\\", R"(\\)", ECMAScript); + check_atomescape_identityescape("a", ECMAScript); + check_atomescape_identityescape("-", ECMAScript); + check_atomescape_identityescape(" ", ECMAScript); + check_atomescape_identityescape("(", ECMAScript); + check_atomescape_identityescape(")", ECMAScript); + check_atomescape_identityescape("[", ECMAScript); + check_atomescape_identityescape("]", ECMAScript); + check_atomescape_identityescape("{", ECMAScript); + check_atomescape_identityescape("}", ECMAScript); + check_atomescape_identityescape("*", ECMAScript); + check_atomescape_identityescape("?", ECMAScript); + check_atomescape_identityescape("+", ECMAScript); + check_atomescape_identityescape("^", ECMAScript); + check_atomescape_identityescape("$", ECMAScript); + check_atomescape_identityescape(".", ECMAScript); + + { + string pattern = R"(\z)"; + basic_regex> custom_charclass_regex{pattern, ECMAScript}; + check_match("z", pattern, custom_charclass_regex); + check_no_match("\\", pattern, custom_charclass_regex); + check_no_match(" ", pattern, custom_charclass_regex); + } + { + string pattern = R"(\Z)"; + basic_regex> custom_charclass_regex{pattern, ECMAScript}; + check_match("Z", pattern, custom_charclass_regex); + check_no_match("\\", pattern, custom_charclass_regex); + check_no_match("A", pattern, custom_charclass_regex); + check_no_match(" ", pattern, custom_charclass_regex); + } +} + +void test_gh_5244_classescape_ecmascript() { + + // ClassEscape :: DecimalEscape + // only accepts literal NUL (\0), all other decimal escapes result in error + g_regexTester.should_match("\0"s, R"([\0])", ECMAScript); + g_regexTester.should_not_match("0", R"([\0])", ECMAScript); + g_regexTester.should_not_match("", R"([\0])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\0])", ECMAScript); + g_regexTester.should_throw(R"([\00])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\01])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\1])", error_escape, ECMAScript); + g_regexTester.should_throw(R"((a)[\10])", error_escape, ECMAScript); + g_regexTester.should_throw(R"((a)[\1])", error_escape, ECMAScript); + g_regexTester.should_throw(R"((a)[\01])", error_escape, ECMAScript); + + // ClassEscape :: 'b' + check_classescape_controlescape("\b", "b", ECMAScript); + + // ClassEscape :: CharacterEscape :: ControlEscape + check_classescape_controlescape("\f", "f", ECMAScript); + check_classescape_controlescape("\n", "n", ECMAScript); + check_classescape_controlescape("\r", "r", ECMAScript); + check_classescape_controlescape("\t", "t", ECMAScript); + check_classescape_controlescape("\v", "v", ECMAScript); + + // ClassEscape :: CharacterEscape :: 'c' ControlLetter + g_regexTester.should_match("\x1", R"([\cA])", ECMAScript); + g_regexTester.should_match("\x1a", R"([\cZ])", ECMAScript); + g_regexTester.should_match("\x1", R"([\ca])", ECMAScript); + g_regexTester.should_match("\x1a", R"([\cz])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\ca])", ECMAScript); + g_regexTester.should_not_match("c", R"([\ca])", ECMAScript); + g_regexTester.should_not_match("ca", R"([\ca])", ECMAScript); + g_regexTester.should_throw(R"([\c0])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\c])", error_escape, ECMAScript); + + // ClassEscape :: CharacterEscape :: HexEscapeSequence + g_regexTester.should_match("\x00"s, R"([\x00])", ECMAScript); + g_regexTester.should_match("\x00"s, R"([\x000])", ECMAScript); + g_regexTester.should_match("0", R"([\x000])", ECMAScript); + g_regexTester.should_match("A", R"([\x41])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\x41])", ECMAScript); + g_regexTester.should_match("\xff", R"([\xff])", ECMAScript); + g_regexTester.should_throw(R"([\x])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\x0])", error_escape, ECMAScript); + + // ClassEscape :: CharacterEscape :: UnicodeEscapeSequence + g_regexTester.should_match("\u0000"s, R"([\u0000])", ECMAScript); + g_regexTester.should_match("\u0000"s, R"([\u00000])", ECMAScript); + g_regexTester.should_match("0", R"([\u00000])", ECMAScript); + g_regexTester.should_match("A", R"([\u0041])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\u0041])", ECMAScript); + g_regexTester.should_throw(R"([\uffff])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\u2600])", error_escape, ECMAScript); // U+2600 BLACK SUN WITH RAYS + g_regexTester.should_throw(R"([\u])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\u0])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\u00])", error_escape, ECMAScript); + g_regexTester.should_throw(R"([\u000])", error_escape, ECMAScript); + + // ClassEscape :: CharacterClassEscape + g_regexTester.should_match("0", R"([\d])", ECMAScript); + g_regexTester.should_match("5", R"([\d])", ECMAScript); + g_regexTester.should_not_match("a", R"([\d])", ECMAScript); + g_regexTester.should_not_match("_", R"([\d])", ECMAScript); + g_regexTester.should_not_match("-", R"([\d])", ECMAScript); + g_regexTester.should_not_match("\1", R"([\d])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\d])", ECMAScript); + g_regexTester.should_not_match("0", R"([\D])", ECMAScript); + g_regexTester.should_not_match("5", R"([\D])", ECMAScript); + g_regexTester.should_match("a", R"([\D])", ECMAScript); + g_regexTester.should_match("_", R"([\D])", ECMAScript); + g_regexTester.should_match("-", R"([\D])", ECMAScript); + g_regexTester.should_match("\1", R"([\D])", ECMAScript); + g_regexTester.should_match("\\", R"([\D])", ECMAScript); + + g_regexTester.should_match("a", R"([\w])", ECMAScript); + g_regexTester.should_match("A", R"([\w])", ECMAScript); + g_regexTester.should_match("0", R"([\w])", ECMAScript); + g_regexTester.should_match("_", R"([\w])", ECMAScript); + g_regexTester.should_not_match("-", R"([\w])", ECMAScript); + g_regexTester.should_not_match("\1", R"([\w])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\w])", ECMAScript); + g_regexTester.should_not_match(" ", R"([\w])", ECMAScript); + g_regexTester.should_not_match("a", R"([\W])", ECMAScript); + g_regexTester.should_not_match("A", R"([\W])", ECMAScript); + g_regexTester.should_not_match("0", R"([\W])", ECMAScript); + g_regexTester.should_not_match("_", R"([\W])", ECMAScript); + g_regexTester.should_match("-", R"([\W])", ECMAScript); + g_regexTester.should_match("\1", R"([\W])", ECMAScript); + g_regexTester.should_match("\\", R"([\W])", ECMAScript); + g_regexTester.should_match(" ", R"([\W])", ECMAScript); + + g_regexTester.should_match(" ", R"([\s])", ECMAScript); + g_regexTester.should_match("\t", R"([\s])", ECMAScript); + g_regexTester.should_match("\n", R"([\s])", ECMAScript); + g_regexTester.should_not_match("a", R"([\s])", ECMAScript); + g_regexTester.should_not_match("0", R"([\s])", ECMAScript); + g_regexTester.should_not_match("_", R"([\s])", ECMAScript); + g_regexTester.should_not_match("-", R"([\s])", ECMAScript); + g_regexTester.should_not_match("\1", R"([\s])", ECMAScript); + g_regexTester.should_not_match("\\", R"([\s])", ECMAScript); + g_regexTester.should_not_match(" ", R"([\S])", ECMAScript); + g_regexTester.should_not_match("\t", R"([\S])", ECMAScript); + g_regexTester.should_not_match("\n", R"([\S])", ECMAScript); + g_regexTester.should_match("a", R"([\S])", ECMAScript); + g_regexTester.should_match("0", R"([\S])", ECMAScript); + g_regexTester.should_match("_", R"([\S])", ECMAScript); + g_regexTester.should_match("-", R"([\S])", ECMAScript); + g_regexTester.should_match("\1", R"([\S])", ECMAScript); + g_regexTester.should_match("\\", R"([\S])", ECMAScript); + + // ClassEscape :: CharacterEscape :: IdentityEscape + g_regexTester.should_match("\\", R"([\\])", ECMAScript); + g_regexTester.should_not_match("b", R"([\\])", ECMAScript); + check_classescape_identityescape("a", ECMAScript); + check_classescape_identityescape("-", ECMAScript); + check_classescape_identityescape(" ", ECMAScript); + check_classescape_identityescape("(", ECMAScript); + check_classescape_identityescape(")", ECMAScript); + check_classescape_identityescape("[", ECMAScript); + check_classescape_identityescape("]", ECMAScript); + check_classescape_identityescape("{", ECMAScript); + check_classescape_identityescape("}", ECMAScript); + check_classescape_identityescape("*", ECMAScript); + check_classescape_identityescape("?", ECMAScript); + check_classescape_identityescape("+", ECMAScript); + check_classescape_identityescape("^", ECMAScript); + check_classescape_identityescape("$", ECMAScript); + check_classescape_identityescape(".", ECMAScript); + check_classescape_identityescape("B", ECMAScript); + + { + string pattern = R"([\z])"; + basic_regex> custom_charclass_regex{pattern, ECMAScript}; + check_match("z", pattern, custom_charclass_regex); + check_no_match("\\", pattern, custom_charclass_regex); + check_no_match(" ", pattern, custom_charclass_regex); + } + { + string pattern = R"([\Z])"; + basic_regex> custom_charclass_regex{pattern, ECMAScript}; + check_match("Z", pattern, custom_charclass_regex); + check_no_match("\\", pattern, custom_charclass_regex); + check_no_match("A", pattern, custom_charclass_regex); + check_no_match(" ", pattern, custom_charclass_regex); + } +} + +void test_gh_5244_atomescape_posix_common(syntax_option_type option) { + // Sections on "BRE Special Characters" and "ERE Special Characters": + // When special character is preceded by backslash, the special character matches itself + g_regexTester.should_match("\\", R"(\\)", option); + g_regexTester.should_not_match("b", R"(\\)", option); + g_regexTester.should_not_match("\\\\", R"(\\)", option); + check_atomescape_identityescape(".", option); + check_atomescape_identityescape("[", option); + check_atomescape_identityescape("*", option); + check_atomescape_identityescape("^", option); + check_atomescape_identityescape("$", option); + + // Sections on "BRE Special Characters" and "ERE Special Characters": + // escaping ordinary characters is undefined -> reject + g_regexTester.should_throw(R"(\B)", error_escape, option); + g_regexTester.should_throw(R"(\c)", error_escape, option); + g_regexTester.should_throw(R"(\ca)", error_escape, option); + g_regexTester.should_throw(R"(\x000)", error_escape, option); + g_regexTester.should_throw(R"(\u0000)", error_escape, option); + g_regexTester.should_throw(R"(\d)", error_escape, option); + g_regexTester.should_throw(R"(\D)", error_escape, option); + g_regexTester.should_throw(R"(\w)", error_escape, option); + g_regexTester.should_throw(R"(\W)", error_escape, option); + g_regexTester.should_throw(R"(\s)", error_escape, option); + g_regexTester.should_throw(R"(\S)", error_escape, option); + + // while [ is special, ] is not + g_regexTester.should_throw(R"(\])", error_escape, option); +} + +void test_gh_5244_atomescape_posix_not_awk(syntax_option_type option) { + test_gh_5244_atomescape_posix_common(option); + + // reject awk-only escapes + g_regexTester.should_throw(R"(\a)", error_escape, option); + g_regexTester.should_throw(R"(\b)", error_escape, option); + g_regexTester.should_throw(R"(\f)", error_escape, option); + g_regexTester.should_throw(R"(\n)", error_escape, option); + g_regexTester.should_throw(R"(\t)", error_escape, option); + g_regexTester.should_throw(R"(\r)", error_escape, option); + g_regexTester.should_throw(R"(\v)", error_escape, option); + g_regexTester.should_throw(R"(\")", error_escape, option); + g_regexTester.should_throw(R"(\/)", error_escape, option); +} + +void test_gh_5244_atomescape_basic_or_grep(syntax_option_type option) { + test_gh_5244_atomescape_posix_not_awk(option); + + // Section on "BREs Matching Multiple Characters": + // \ plus digit is backreference to previously completed subexpression + g_regexTester.should_throw(R"(\0)", error_escape, option); + g_regexTester.should_throw(R"(\1)", error_backref, option); + g_regexTester.should_match("aa", R"(\(a\)\1)", option); + g_regexTester.should_throw(R"(\(a\)\0)", error_escape, option); + g_regexTester.should_throw(R"(\1\(a\))", error_backref, option); + + // check that the parser rejects escaped characters + // that are only special in extended regexes or awk + g_regexTester.should_throw(R"(\+)", error_escape, option); + g_regexTester.should_throw(R"(\?)", error_escape, option); + g_regexTester.should_throw(R"(\|)", error_escape, option); + g_regexTester.should_throw(R"(\")", error_escape, option); + g_regexTester.should_throw(R"(\/)", error_escape, option); +} + +void test_gh_5244_atomescape_extended_egrep_awk(syntax_option_type option) { + test_gh_5244_atomescape_posix_common(option); + + // check that the parser accepts escaped characters + // that are only special in extended regexes + check_atomescape_identityescape("+", option); + check_atomescape_identityescape("?", option); + check_atomescape_identityescape("|", option); + check_atomescape_identityescape("(", option); + check_atomescape_identityescape(")", option); + check_atomescape_identityescape("{", option); + + // Even though { is special, } is not, + // so the interpretation of the escape sequence \} is undefined + // according to the POSIX standard referenced in the C++ standard. + // But we treat \} as an identity escape in line with + // more recent versions of the POSIX standard. + check_atomescape_identityescape("}", option); +} + +void test_gh_5244_atomescape_extended_or_egrep(syntax_option_type option) { + test_gh_5244_atomescape_extended_egrep_awk(option); + test_gh_5244_atomescape_posix_not_awk(option); + + // there are no backreferences in extended regexes + g_regexTester.should_throw(R"(\0)", error_escape, option); + g_regexTester.should_throw(R"(\1)", error_escape, option); + g_regexTester.should_throw(R"((a)\1)", error_escape, option); + g_regexTester.should_throw(R"((a)\0)", error_escape, option); + g_regexTester.should_throw(R"(\1(a))", error_escape, option); +} + +void test_gh_5244_atomescape_awk() { + test_gh_5244_atomescape_extended_egrep_awk(awk); + + // awk-only escapes + check_atomescape_controlescape("\a", "a", awk); + check_atomescape_controlescape("\b", "b", awk); + check_atomescape_controlescape("\f", "f", awk); + check_atomescape_controlescape("\n", "n", awk); + check_atomescape_controlescape("\r", "r", awk); + check_atomescape_controlescape("\t", "t", awk); + check_atomescape_controlescape("\v", "v", awk); + check_atomescape_identityescape("\"", awk); + check_atomescape_identityescape("/", awk); + + // awk supports octal sequences + g_regexTester.should_match("\1", R"(\1)", awk); + g_regexTester.should_not_match("1", R"(\1)", awk); + g_regexTester.should_not_match("\\1", R"(\1)", awk); + g_regexTester.should_match("\11", R"(\11)", awk); + g_regexTester.should_not_match("1", R"(\11)", awk); + g_regexTester.should_not_match("11", R"(\11)", awk); + g_regexTester.should_not_match("\\11", R"(\11)", awk); + g_regexTester.should_match("\111", R"(\111)", awk); + g_regexTester.should_not_match("1", R"(\111)", awk); + g_regexTester.should_not_match("11", R"(\111)", awk); + g_regexTester.should_not_match("111", R"(\111)", awk); + g_regexTester.should_not_match("\\111", R"(\111)", awk); + // clang-format off + g_regexTester.should_match("\111""1", R"(\1111)", awk); + // clang-format on + g_regexTester.should_not_match("\111", R"(\1111)", awk); + g_regexTester.should_not_match("1", R"(\1111)", awk); + g_regexTester.should_not_match("11", R"(\1111)", awk); + g_regexTester.should_not_match("111", R"(\1111)", awk); + g_regexTester.should_not_match("1111", R"(\1111)", awk); + g_regexTester.should_not_match("\\1111", R"(\1111)", awk); + g_regexTester.should_match("A", R"(\101)", awk); + g_regexTester.should_match("Aa", R"(\101a)", awk); + g_regexTester.should_match("\33", R"(\033)", awk); + g_regexTester.should_match("\33a", R"(\033a)", awk); + g_regexTester.should_match("\33", R"(\33)", awk); + g_regexTester.should_match("\33a", R"(\33a)", awk); + g_regexTester.should_match("\1", R"(\001)", awk); + g_regexTester.should_match("\1a", R"(\001a)", awk); + g_regexTester.should_match("\1", R"(\01)", awk); + g_regexTester.should_match("\1a", R"(\01a)", awk); + g_regexTester.should_match("\1", R"(\1)", awk); + g_regexTester.should_match("\1a", R"(\1a)", awk); + g_regexTester.should_throw(R"(\8)", error_escape, awk); + // clang-format off + g_regexTester.should_match("\1""8", R"(\18)", awk); + g_regexTester.should_match("\12""9", R"(\129)", awk); + // clang-format on + + // octal sequences evaluating to 0 are considered undefined by the standard + g_regexTester.should_throw(R"(\0)", error_escape, awk); + g_regexTester.should_throw(R"(\00)", error_escape, awk); + g_regexTester.should_throw(R"(\000)", error_escape, awk); +} + +void test_gh_5244_classescape_posix_not_awk(syntax_option_type option) { + // Sections "BRE Bracket Expressions" and "ERE Bracket Expressions": + // Backslash shall lose its special meaning, so \c should always match \ + c + + // common special characters outside character classes + check_classescape_noescape(".", option); + check_classescape_noescape("[", option); + check_classescape_noescape("*", option); + check_classescape_noescape("^", option); + check_classescape_noescape("$", option); + check_classescape_noescape("B", option); + check_classescape_noescape("c", option); + + // special characters outside character classes in extended regexes + check_classescape_noescape("+", option); + check_classescape_noescape("?", option); + check_classescape_noescape("|", option); + + // TRANSITION, GH-5379 + if (option & (extended | egrep | awk)) { + check_classescape_noescape("(", option); + check_classescape_noescape(")", option); + check_classescape_noescape("{", option); + } + + // closing characters that are not considered special + g_regexTester.should_match("\\]", R"([\]])", option); + g_regexTester.should_not_match("]", R"([\]])", option); + g_regexTester.should_not_match("\\", R"([\]])", option); + + // TRANSITION, GH-5379 + if (option & (extended | egrep | awk)) { + check_classescape_noescape("}", option); + } + + // awk escape sequences + check_classescape_noescape("a", option); + check_classescape_noescape("b", option); + check_classescape_noescape("f", option); + check_classescape_noescape("n", option); + check_classescape_noescape("r", option); + check_classescape_noescape("t", option); + check_classescape_noescape("v", option); + check_classescape_noescape("\"", option); + check_classescape_noescape("/", option); + + // awk octal sequences + check_classescape_noescape("0", option); + check_classescape_noescape("1", option); + g_regexTester.should_match("1", R"([\101])", option); + g_regexTester.should_match("0", R"([\101])", option); + g_regexTester.should_match("\\", R"([\101])", option); + g_regexTester.should_match("1", R"([\101a])", option); + g_regexTester.should_match("0", R"([\101a])", option); + g_regexTester.should_match("\\", R"([\101a])", option); + g_regexTester.should_match("a", R"([\101a])", option); + g_regexTester.should_match("3", R"([\033a])", option); + g_regexTester.should_match("0", R"([\033a])", option); + g_regexTester.should_match("\\", R"([\033a])", option); + g_regexTester.should_match("a", R"([\033a])", option); + g_regexTester.should_match("3", R"([\33a])", option); + g_regexTester.should_match("\\", R"([\33a])", option); + g_regexTester.should_match("a", R"([\33a])", option); + g_regexTester.should_match("1", R"([\001a])", option); + g_regexTester.should_match("0", R"([\001a])", option); + g_regexTester.should_match("\\", R"([\001a])", option); + g_regexTester.should_match("a", R"([\001a])", option); + + // ECMAScript escape sequences + g_regexTester.should_match("c", R"([\ca])", option); + g_regexTester.should_match("a", R"([\ca])", option); + g_regexTester.should_match("\\", R"([\ca])", option); + g_regexTester.should_match("x", R"([\x000])", option); + g_regexTester.should_match("0", R"([\x000])", option); + g_regexTester.should_match("\\", R"([\x000])", option); + g_regexTester.should_match("u", R"([\u0000])", option); + g_regexTester.should_match("0", R"([\u0000])", option); + g_regexTester.should_match("\\", R"([\u0000])", option); + check_classescape_noescape("d", option); + check_classescape_noescape("D", option); + check_classescape_noescape("w", option); + check_classescape_noescape("W", option); + check_classescape_noescape("s", option); + check_classescape_noescape("S", option); +} + +void test_gh_5244_classescape_basic_or_grep(syntax_option_type option) { + test_gh_5244_classescape_posix_not_awk(option); + + // check no backreference handling + g_regexTester.should_match("a1", R"(\(a\)[\1])", option); + g_regexTester.should_match("a\\", R"(\(a\)[\1])", option); + g_regexTester.should_not_match("aa", R"(\(a\)[\1])", option); + g_regexTester.should_match("a0", R"(\(a\)[\0])", option); + g_regexTester.should_match("a\\", R"(\(a\)[\0])", option); + g_regexTester.should_match("1a", R"([\1]\(a\))", option); + g_regexTester.should_match("\\a", R"([\1]\(a\))", option); + g_regexTester.should_not_match("aa", R"([\1]\(a\))", option); +} + +void test_gh_5244_classescape_extended_or_egrep(syntax_option_type option) { + test_gh_5244_classescape_posix_not_awk(option); + + // check no backreference handling + g_regexTester.should_match("a1", R"((a)[\1])", option); + g_regexTester.should_match("a\\", R"((a)[\1])", option); + g_regexTester.should_not_match("aa", R"((a)[\1])", option); + g_regexTester.should_match("a0", R"((a)[\0])", option); + g_regexTester.should_match("a\\", R"((a)[\0])", option); + g_regexTester.should_match("1a", R"([\1](a))", option); + g_regexTester.should_match("\\a", R"([\1](a))", option); + g_regexTester.should_not_match("aa", R"([\1](a))", option); +} + +void test_gh_5244_classescape_awk() { + // awk-only sequences and backslash + g_regexTester.should_match("\\", "[\\\\]", awk); + g_regexTester.should_not_match("g", "[\\\\]", awk); + check_classescape_controlescape("\a", "a", awk); + check_classescape_controlescape("\b", "b", awk); + check_classescape_controlescape("\f", "f", awk); + check_classescape_controlescape("\n", "n", awk); + check_classescape_controlescape("\r", "r", awk); + check_classescape_controlescape("\t", "t", awk); + check_classescape_controlescape("\v", "v", awk); + check_classescape_identityescape("\"", awk); + check_classescape_identityescape("/", awk); + + // awk supports octal sequences + g_regexTester.should_match("\1", R"([\1])", awk); + g_regexTester.should_not_match("1", R"([\1])", awk); + g_regexTester.should_not_match("\\", R"([\1])", awk); + g_regexTester.should_match("\11", R"([\11])", awk); + g_regexTester.should_not_match("1", R"([\11])", awk); + g_regexTester.should_not_match("\\", R"([\11])", awk); + g_regexTester.should_match("\111", R"([\111])", awk); + g_regexTester.should_not_match("0", R"([\111])", awk); + g_regexTester.should_not_match("\\", R"([\111])", awk); + g_regexTester.should_match("\111", R"([\1111])", awk); + g_regexTester.should_match("1", R"([\1111])", awk); + g_regexTester.should_not_match("\\", R"([\1111])", awk); + g_regexTester.should_match("A", R"([\101])", awk); + g_regexTester.should_not_match("\\", R"([\101])", awk); + g_regexTester.should_not_match("1", R"([\101])", awk); + g_regexTester.should_not_match("0", R"([\101])", awk); + g_regexTester.should_not_match("\1", R"([\101])", awk); + g_regexTester.should_not_match("\0", R"([\101])", awk); + g_regexTester.should_match("A", R"([\101a])", awk); + g_regexTester.should_match("a", R"([\101a])", awk); + g_regexTester.should_not_match("\\", R"([\101a])", awk); + g_regexTester.should_not_match("1", R"([\101a])", awk); + g_regexTester.should_not_match("0", R"([\101a])", awk); + g_regexTester.should_not_match("\1", R"([\101a])", awk); + g_regexTester.should_not_match("\0", R"([\101a])", awk); + g_regexTester.should_match("\33", R"([\033])", awk); + g_regexTester.should_not_match("\\", R"([\033])", awk); + g_regexTester.should_not_match("3", R"([\033])", awk); + g_regexTester.should_not_match("0", R"([\033])", awk); + g_regexTester.should_not_match("\3", R"([\033])", awk); + g_regexTester.should_not_match("\0", R"([\033])", awk); + g_regexTester.should_match("\33", R"([\033a])", awk); + g_regexTester.should_match("a", R"([\033a])", awk); + g_regexTester.should_not_match("\\", R"([\033a])", awk); + g_regexTester.should_not_match("3", R"([\033a])", awk); + g_regexTester.should_not_match("0", R"([\033a])", awk); + g_regexTester.should_not_match("\3", R"([\033a])", awk); + g_regexTester.should_not_match("\0", R"([\033a])", awk); + g_regexTester.should_match("\33", R"([\33])", awk); + g_regexTester.should_not_match("\\", R"([\33])", awk); + g_regexTester.should_not_match("3", R"([\33])", awk); + g_regexTester.should_not_match("\3", R"([\33])", awk); + g_regexTester.should_match("\33", R"([\33a])", awk); + g_regexTester.should_match("a", R"([\33a])", awk); + g_regexTester.should_not_match("\\", R"([\33a])", awk); + g_regexTester.should_not_match("3", R"([\33a])", awk); + g_regexTester.should_not_match("\3", R"([\33a])", awk); + g_regexTester.should_match("\1", R"([\001])", awk); + g_regexTester.should_not_match("\\", R"([\001])", awk); + g_regexTester.should_not_match("1", R"([\001])", awk); + g_regexTester.should_not_match("0", R"([\001])", awk); + g_regexTester.should_not_match("\0", R"([\001])", awk); + g_regexTester.should_match("\1", R"([\001a])", awk); + g_regexTester.should_match("a", R"([\001a])", awk); + g_regexTester.should_not_match("\\", R"([\001a])", awk); + g_regexTester.should_not_match("1", R"([\001a])", awk); + g_regexTester.should_not_match("0", R"([\001a])", awk); + g_regexTester.should_not_match("\0", R"([\001a])", awk); + g_regexTester.should_match("\1", R"([\01])", awk); + g_regexTester.should_not_match("\\", R"([\01])", awk); + g_regexTester.should_not_match("1", R"([\01])", awk); + g_regexTester.should_not_match("0", R"([\01])", awk); + g_regexTester.should_not_match("\0", R"([\01])", awk); + g_regexTester.should_match("\1", R"([\01a])", awk); + g_regexTester.should_match("a", R"([\01a])", awk); + g_regexTester.should_not_match("\\", R"([\01a])", awk); + g_regexTester.should_not_match("1", R"([\01a])", awk); + g_regexTester.should_not_match("0", R"([\01a])", awk); + g_regexTester.should_not_match("\0", R"([\01a])", awk); + g_regexTester.should_match("\1", R"([\1])", awk); + g_regexTester.should_not_match("\\", R"([\1])", awk); + g_regexTester.should_not_match("1", R"([\1])", awk); + g_regexTester.should_match("\1", R"([\1a])", awk); + g_regexTester.should_match("a", R"([\1a])", awk); + g_regexTester.should_not_match("\\", R"([\1a])", awk); + g_regexTester.should_not_match("1", R"([\1a])", awk); + g_regexTester.should_throw(R"([\8])", error_escape, awk); + g_regexTester.should_match("\1", R"([\18])", awk); + g_regexTester.should_match("8", R"([\18])", awk); + g_regexTester.should_not_match("\\", R"([\18])", awk); + g_regexTester.should_not_match("1", R"([\18])", awk); + g_regexTester.should_match("\12", R"([\129])", awk); + g_regexTester.should_match("9", R"([\129])", awk); + g_regexTester.should_not_match("\\", R"([\129])", awk); + g_regexTester.should_not_match("1", R"([\129])", awk); + g_regexTester.should_not_match("2", R"([\129])", awk); + g_regexTester.should_not_match("\131", R"([\129])", awk); + + // octal sequences evaluating to 0 are considered undefined by the standard + g_regexTester.should_throw(R"([\0])", error_escape, awk); + g_regexTester.should_throw(R"([\00])", error_escape, awk); + g_regexTester.should_throw(R"([\000])", error_escape, awk); + + // all other escapes (including the atom escapes of basic and extended regexes) are undefined + g_regexTester.should_throw(R"([\.])", error_escape, awk); + g_regexTester.should_throw(R"([\[])", error_escape, awk); + g_regexTester.should_throw(R"([\*])", error_escape, awk); + g_regexTester.should_throw(R"([\^])", error_escape, awk); + g_regexTester.should_throw(R"([\$])", error_escape, awk); + g_regexTester.should_throw(R"([\]])", error_escape, awk); + g_regexTester.should_throw(R"([\+])", error_escape, awk); + g_regexTester.should_throw(R"([\?])", error_escape, awk); + g_regexTester.should_throw(R"([\|])", error_escape, awk); + g_regexTester.should_throw(R"([\(])", error_escape, awk); + g_regexTester.should_throw(R"([\)])", error_escape, awk); + g_regexTester.should_throw(R"([\{])", error_escape, awk); + g_regexTester.should_throw(R"([\}])", error_escape, awk); + g_regexTester.should_throw(R"([\B])", error_escape, awk); + g_regexTester.should_throw(R"([\c])", error_escape, awk); + g_regexTester.should_throw(R"([\ca])", error_escape, awk); + g_regexTester.should_throw(R"([\x000])", error_escape, awk); + g_regexTester.should_throw(R"([\u0000])", error_escape, awk); + g_regexTester.should_throw(R"([\d])", error_escape, awk); + g_regexTester.should_throw(R"([\D])", error_escape, awk); + g_regexTester.should_throw(R"([\w])", error_escape, awk); + g_regexTester.should_throw(R"([\W])", error_escape, awk); + g_regexTester.should_throw(R"([\s])", error_escape, awk); + g_regexTester.should_throw(R"([\S])", error_escape, awk); +} + +void test_gh_5244() { + test_gh_5244_atomescape_ecmascript(); + test_gh_5244_classescape_ecmascript(); + + for (syntax_option_type basic_or_grep : {basic, grep}) { + test_gh_5244_atomescape_basic_or_grep(basic_or_grep); + test_gh_5244_classescape_basic_or_grep(basic_or_grep); + } + + for (syntax_option_type extended_or_egrep : {extended, egrep}) { + test_gh_5244_atomescape_extended_or_egrep(extended_or_egrep); + test_gh_5244_classescape_extended_or_egrep(extended_or_egrep); + } + + test_gh_5244_atomescape_awk(); + test_gh_5244_classescape_awk(); +} + +int main() { + test_gh_5244(); + + return g_regexTester.result(); +} diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp index b720710ff9f..655c22417de 100644 --- a/tests/tr1/tests/regex2/test.cpp +++ b/tests/tr1/tests/regex2/test.cpp @@ -460,8 +460,7 @@ static const regex_test tests[] = { {__LINE__, T("[\\a]"), T("\\"), "1 0 1", BASIC | GREP | EXTENDED | EGREP}, {__LINE__, T("[\\a]"), T("a"), "1 0 1", ECMA | BASIC | GREP | EXTENDED | EGREP}, - {__LINE__, T("[\\b]"), T("\b"), "1 0 1", AWK}, - {__LINE__, T("[\\b]"), T(""), "0", ECMA}, + {__LINE__, T("[\\b]"), T("\b"), "1 0 1", AWK | ECMA}, {__LINE__, T("[\\b]"), T("\\"), "1 0 1", BASIC | GREP | EXTENDED | EGREP}, {__LINE__, T("[\\b]"), T("b"), "1 0 1", BASIC | GREP | EXTENDED | EGREP}, @@ -487,45 +486,63 @@ static const regex_test tests[] = { {__LINE__, T("[\\ca]"), T("\x01"), "1 0 1", ECMA}, {__LINE__, T("[\\c3]"), T("\\"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, - {__LINE__, T("[\\c3]"), T("c"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\c3]"), T("3"), "1 0 1", NOT_ECMA}, + {__LINE__, T("[\\c3]"), T("c"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\c3]"), T("3"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\c3]"), T("c"), "-1", AWK}, {__LINE__, T("[\\x1b]"), T("\x1b"), "1 0 1", ECMA}, {__LINE__, T("[\\x1b]"), T("\\"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, - {__LINE__, T("[\\x1b]"), T("x"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\x1b]"), T("1"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\x1b]"), T("b"), "1 0 1", NOT_ECMA}, + {__LINE__, T("[\\x1b]"), T("x"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\x1b]"), T("1"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\x1b]"), T("b"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\x1b]"), T("b"), "-1", AWK}, #ifdef WIDE {__LINE__, T("[\\u12cd]"), T("\u12cd"), "1 0 1", ECMA}, #else // defined WIDE {__LINE__, T("[\\u12cd]"), T(""), "-1", ECMA}, #endif // defined WIDE - {__LINE__, T("[\\u12cd]"), T("\\"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\u12cd]"), T("u"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\u12cd]"), T("1"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\u12cd]"), T("2"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\u12cd]"), T("c"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\u12cd]"), T("d"), "1 0 1", NOT_ECMA}, + {__LINE__, T("[\\u12cd]"), T("\\"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("u"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("1"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("2"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("c"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("d"), "1 0 1", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\u12cd]"), T("d"), "-1", AWK}, {__LINE__, T("[\\\\]"), T("\\"), "1 0 1", ALL}, - {__LINE__, T("[\\^]"), T("^"), "1 0 1", ALL}, - {__LINE__, T("[\\(]"), T("("), "1 0 1", ALL}, - {__LINE__, T("[\\)]"), T(")"), "1 0 1", ALL}, - {__LINE__, T("[\\{]"), T("{"), "1 0 1", ALL}, - {__LINE__, T("[\\}]"), T("}"), "1 0 1", ALL}, - {__LINE__, T("[\\[]"), T("["), "1 0 1", ALL}, + {__LINE__, T("[\\^]"), T("^"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\^]"), T("^"), "-1", AWK}, + {__LINE__, T("[\\(]"), T("("), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\(]"), T("("), "-1", AWK}, + {__LINE__, T("[\\)]"), T(")"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\)]"), T(")"), "-1", AWK}, + {__LINE__, T("[\\{]"), T("{"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\{]"), T("{"), "-1", AWK}, + {__LINE__, T("[\\}]"), T("}"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\}]"), T("}"), "-1", AWK}, + {__LINE__, T("[\\[]"), T("["), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\[]"), T("["), "-1", AWK}, {__LINE__, T("[\\]]"), T("]"), "1 0 1", ECMA}, - {__LINE__, T("[\\+]"), T("+"), "1 0 1", ALL}, - {__LINE__, T("[\\*]"), T("*"), "1 0 1", ALL}, - {__LINE__, T("[\\?]"), T("?"), "1 0 1", ALL}, + {__LINE__, T("[\\+]"), T("+"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\+]"), T("+"), "-1", AWK}, + {__LINE__, T("[\\*]"), T("*"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\*]"), T("*"), "-1", AWK}, + {__LINE__, T("[\\?]"), T("?"), "1 0 1", NOT_AWK}, + {__LINE__, T("[\\?]"), T("?"), "-1", AWK}, // character class escape - {__LINE__, T("[\\d]"), T("1"), "0", NOT_ECMA}, - {__LINE__, T("[\\D]"), T("a"), "0", NOT_ECMA}, - {__LINE__, T("[\\s]"), T(" "), "0", NOT_ECMA}, - {__LINE__, T("[\\S]"), T("a"), "0", NOT_ECMA}, - {__LINE__, T("[\\w]"), T("a"), "0", NOT_ECMA}, - {__LINE__, T("[\\W]"), T(" "), "0", NOT_ECMA}, + {__LINE__, T("[\\d]"), T("1"), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\d]"), T("1"), "-1", AWK}, + {__LINE__, T("[\\D]"), T("a"), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\D]"), T("a"), "-1", AWK}, + {__LINE__, T("[\\s]"), T(" "), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\s]"), T(" "), "-1", AWK}, + {__LINE__, T("[\\S]"), T("a"), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\S]"), T("a"), "-1", AWK}, + {__LINE__, T("[\\w]"), T("a"), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\w]"), T("a"), "-1", AWK}, + {__LINE__, T("[\\W]"), T(" "), "0", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\W]"), T(" "), "-1", AWK}, // named character classes {__LINE__, T("[[:alnum:]]"), T("b"), "1 0 1", ALL}, From a49844cf5cac0a6fd56853b31c18125005bfd65a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sun, 30 Mar 2025 15:22:50 +0200 Subject: [PATCH 02/14] add two missing member functions to custom regex traits in test --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 133444214f8..f26cc6ed5ec 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -70,6 +70,14 @@ class test_regex_traits { return inner.value(ch, radix); } + locale_type imbue(locale_type l) { + return inner.imbue(l); + } + + locale_type getloc() const { + return inner.getloc(); + } + private: regex_traits inner; }; From e51ea02fc74ecb4a95b49aee18d2508bf54398ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Mon, 31 Mar 2025 19:52:25 +0200 Subject: [PATCH 03/14] Avoid parsing first decimal digit twice in _Parser::_AtomEscape() --- stl/inc/regex | 58 +++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index 4968917f9c2..a2dee47093a 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1727,8 +1727,8 @@ private: void _Expect(_Meta_type, regex_constants::error_type); // parsing - int _Do_digits(int _Base, int _Count, regex_constants::error_type _Error_type); - bool _DecimalDigits2(regex_constants::error_type _Error_type, int _Count = INT_MAX); + int _Do_digits(int _Base, int _Initial, int _Count, regex_constants::error_type _Error_type); + bool _DecimalDigits3(regex_constants::error_type _Error_type, int _Initial = 0); void _HexDigits(int); bool _OctalDigits(); void _Do_ex_class(_Meta_type); @@ -3993,9 +3993,9 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants: template int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits( - int _Base, int _Count, regex_constants::error_type _Error_type) { // translate digits to numeric value + int _Base, int _Initial, int _Count, regex_constants::error_type _Error_type) { // translate digits to numeric value int _Chv; - _Val = 0; + _Val = _Initial; while (_Count != 0 && (_Chv = _Traits.value(_Char, _Base)) != -1) { // append next digit if (_Val > (INT_MAX - _Chv) / _Base) { _Error(_Error_type); @@ -4009,21 +4009,21 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits( } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits2( - const regex_constants::error_type _Error_type, const int _Count /* = INT_MAX */) { // check for decimal value - return _Do_digits(10, _Count, _Error_type) != _Count; +bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits3( + const regex_constants::error_type _Error_type, const int _Initial /* = 0 */) { // check for decimal value + return _Do_digits(10, _Initial, INT_MAX, _Error_type) != INT_MAX; } template void _Parser<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits - if (_Do_digits(16, _Count, regex_constants::error_escape) != 0) { + if (_Do_digits(16, 0, _Count, regex_constants::error_escape) != 0) { _Error(regex_constants::error_escape); } } template bool _Parser<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits - return _Do_digits(8, 3, regex_constants::error_escape) != 3; + return _Do_digits(8, 0, 3, regex_constants::error_escape) != 3; } template @@ -4406,23 +4406,27 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_clas template void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape - constexpr int _Bre_max_backref_digits = 1; - if ((_L_flags & _L_bzr_chr) && _Traits.value(_Char, 10) == 0) { // handle \0 + if ((_L_flags & (_L_bzr_chr | _L_bckr)) && (_Val = _Traits.value(_Char, 10)) != -1) { // escaped decimal sequence _Next(); - if (_Traits.value(_Char, 10) != -1) { - _Error(regex_constants::error_escape); - } - _Nfa._Add_char2(_Elem{}); - } else if ((_L_flags & _L_bckr) - && _DecimalDigits2(regex_constants::error_backref, - (_L_flags & _L_lim_bckr) ? _Bre_max_backref_digits : INT_MAX)) { // check for valid back reference - if (_Val == 0) { - _Error(regex_constants::error_escape); - } else if (_Grp_idx < static_cast(_Val) || _Finished_grps.size() <= static_cast(_Val) - || !_Finished_grps[static_cast(_Val)]) { - _Error(regex_constants::error_backref); - } else { - _Nfa._Add_backreference(static_cast(_Val)); + if ((_L_flags & _L_bzr_chr) && _Val == 0) { // handle \0 + if (_Traits.value(_Char, 10) != -1) { + _Error(regex_constants::error_escape); + } + _Nfa._Add_char2(_Elem{}); + } else if (_L_flags & _L_bckr) { // check for valid backreference + + if (!(_L_flags & _L_lim_bckr)) { + (void) _DecimalDigits3(regex_constants::error_backref, _Val); + } + + if (_Val == 0) { + _Error(regex_constants::error_escape); + } else if (_Grp_idx < static_cast(_Val) || _Finished_grps.size() <= static_cast(_Val) + || !_Finished_grps[static_cast(_Val)]) { + _Error(regex_constants::error_backref); + } else { + _Nfa._Add_backreference(static_cast(_Val)); + } } } else if (_CharacterEscape(false)) { _Nfa._Add_char2(static_cast<_Elem>(_Val)); @@ -4442,7 +4446,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier _Max = 1; } else if (_Mchar == _Meta_lbr) { // check for valid bracketed value _Next(); - if (!_DecimalDigits2(regex_constants::error_badbrace)) { + if (!_DecimalDigits3(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } @@ -4452,7 +4456,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } else { // check for decimal constant following comma _Next(); if (_Mchar != _Meta_rbr) { - if (!_DecimalDigits2(regex_constants::error_badbrace)) { + if (!_DecimalDigits3(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } From c6508bb97b1a51ccf5cc62f0cb7dde0c2d241c22 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 00:50:39 -0700 Subject: [PATCH 04/14] Style nitpicks. --- stl/inc/regex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index a2dee47093a..ac626500b1e 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -4275,8 +4275,9 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunct } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape( - bool _In_character_class) const { // check for valid identity escape +bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_class) const { + // check for valid identity escape + if (_L_flags & _L_ident_ECMA) { // ECMAScript identity escape characters switch (_Char) { @@ -4323,8 +4324,8 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape( } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape( - bool _In_character_class) { // check whether an escape is valid, and process it if so +bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class) { + // check whether an escape is valid, and process it if so if (_IsIdentityEscape(_In_character_class)) { _Val = _Char; _Next(); @@ -4414,7 +4415,6 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom } _Nfa._Add_char2(_Elem{}); } else if (_L_flags & _L_bckr) { // check for valid backreference - if (!(_L_flags & _L_lim_bckr)) { (void) _DecimalDigits3(regex_constants::error_backref, _Val); } From bd8c6675b74759412f6b18f3bc78de06d476e471 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 02:05:07 -0700 Subject: [PATCH 05/14] Introduce `_L_brk_bal` for clarity. --- stl/inc/regex | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index ac626500b1e..9f335caf48e 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1795,7 +1795,8 @@ enum _Lang_flags { // describe language properties _L_anch_rstr = 0x02000000, // anchor restricted to beginning/end _L_star_beg = 0x04000000, // star okay at beginning of RE/expr (BRE) _L_empty_grp = 0x08000000, // empty group allowed (ERE prohibits "()") - _L_paren_bal = 0x10000000, // ')'/'}'/']' special only after '('/'{'/']' + _L_paren_bal = 0x10000000, // ')'/'}' special only after '('/'{' + _L_brk_bal = 0x20000000, // ']' special only after '[' (ERE, BRE); TRANSITION, ABI: same value as _L_brk_rstr _L_brk_rstr = 0x20000000, // ']' not special when first character in set _L_mtch_long = 0x40000000, // find longest match (ERE, BRE) }; @@ -4533,7 +4534,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte _Error(regex_constants::error_badrepeat); } else if (_Mchar == _Meta_rbr && !(_L_flags & _L_paren_bal)) { _Error(regex_constants::error_brace); - } else if (_Mchar == _Meta_rsq && !(_L_flags & (_L_paren_bal | _L_brk_rstr))) { + } else if (_Mchar == _Meta_rsq && !(_L_flags & _L_brk_bal)) { _Error(regex_constants::error_brack); } else { // add character _Nfa._Add_char2(_Char); @@ -4667,12 +4668,12 @@ _Parser<_FwdIt, _Elem, _RxTraits>::_Parser( | _L_empty_grp; constexpr unsigned int _Basic_flags = - _L_bckr | _L_lim_bckr | _L_anch_rstr | _L_star_beg | _L_empty_grp | _L_brk_rstr | _L_mtch_long; + _L_bckr | _L_lim_bckr | _L_anch_rstr | _L_star_beg | _L_empty_grp | _L_brk_bal | _L_brk_rstr | _L_mtch_long; constexpr unsigned int _Grep_flags = _Basic_flags | _L_alt_nl | _L_no_nl; - constexpr unsigned int _Extended_flags = - _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_ident_ERE | _L_paren_bal | _L_brk_rstr | _L_mtch_long; + constexpr unsigned int _Extended_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_ident_ERE + | _L_paren_bal | _L_brk_bal | _L_brk_rstr | _L_mtch_long; constexpr unsigned int _Awk_flags = _Extended_flags | _L_esc_oct | _L_esc_ffn | _L_esc_ffnx | _L_ident_awk; From d14c10aac0ad91a9e42da4b219bd54ddef6a86e0 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 02:45:57 -0700 Subject: [PATCH 06/14] Expand test coverage for `"[\\]]"` to all grammars. --- tests/tr1/tests/regex2/test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp index 655c22417de..630b96463ac 100644 --- a/tests/tr1/tests/regex2/test.cpp +++ b/tests/tr1/tests/regex2/test.cpp @@ -434,7 +434,6 @@ static const regex_test tests[] = { {__LINE__, T("[]"), T(""), "-1", NOT_ECMA}, {__LINE__, T("[]]"), T("]"), "-1", ECMA}, {__LINE__, T("[]]"), T("]"), "1 0 1", NOT_ECMA}, - {__LINE__, T("[\\]]"), T("]"), "1 0 1", ECMA}, {__LINE__, T("[^]"), T("^"), "1 0 1", ECMA}, {__LINE__, T("[^]]"), T("^"), "1 0 1", NOT_ECMA}, {__LINE__, T("[^]]"), T("]"), "0", NOT_ECMA}, @@ -523,6 +522,8 @@ static const regex_test tests[] = { {__LINE__, T("[\\[]"), T("["), "1 0 1", NOT_AWK}, {__LINE__, T("[\\[]"), T("["), "-1", AWK}, {__LINE__, T("[\\]]"), T("]"), "1 0 1", ECMA}, + {__LINE__, T("[\\]]"), T("\\]"), "1 0 2", BASIC | EXTENDED | GREP | EGREP}, + {__LINE__, T("[\\]]"), T("]"), "-1", AWK}, {__LINE__, T("[\\+]"), T("+"), "1 0 1", NOT_AWK}, {__LINE__, T("[\\+]"), T("+"), "-1", AWK}, {__LINE__, T("[\\*]"), T("*"), "1 0 1", NOT_AWK}, From 7fb589be4f483a69864f185dcf074f24e648a627 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:00:11 -0700 Subject: [PATCH 07/14] Use std::string UDL concatenation to avoid suppressing clang-format. --- .../GH_005244_regex_escape_sequences/test.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index f26cc6ed5ec..b48c30e46cf 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -190,9 +190,7 @@ void test_gh_5244_atomescape_ecmascript() { // AtomEscape :: CharacterEscape :: HexEscapeSequence g_regexTester.should_match("\x00"s, R"(\x00)", ECMAScript); g_regexTester.should_not_match("\\x00", R"(\x00)", ECMAScript); - // clang-format off - g_regexTester.should_match("\x00""0"s, R"(\x000)", ECMAScript); - // clang-format on + g_regexTester.should_match("\x00"s + "0", R"(\x000)", ECMAScript); g_regexTester.should_match("A", R"(\x41)", ECMAScript); g_regexTester.should_not_match("\\", R"(\x41)", ECMAScript); g_regexTester.should_match("\xff", R"(\xff)", ECMAScript); @@ -202,9 +200,7 @@ void test_gh_5244_atomescape_ecmascript() { // AtomEscape :: CharacterEscape :: UnicodeEscapeSequence g_regexTester.should_match("\u0000"s, R"(\u0000)", ECMAScript); g_regexTester.should_not_match("\\u0000", R"(\u0000)", ECMAScript); - // clang-format off - g_regexTester.should_match("\u0000""0"s, R"(\u00000)", ECMAScript); - // clang-format on + g_regexTester.should_match("\u0000"s + "0", R"(\u00000)", ECMAScript); g_regexTester.should_match("A", R"(\u0041)", ECMAScript); g_regexTester.should_not_match("\\", R"(\u0041)", ECMAScript); g_regexTester.should_throw(R"(\uffff)", error_escape, ECMAScript); @@ -575,9 +571,7 @@ void test_gh_5244_atomescape_awk() { g_regexTester.should_not_match("11", R"(\111)", awk); g_regexTester.should_not_match("111", R"(\111)", awk); g_regexTester.should_not_match("\\111", R"(\111)", awk); - // clang-format off - g_regexTester.should_match("\111""1", R"(\1111)", awk); - // clang-format on + g_regexTester.should_match("\111"s + "1", R"(\1111)", awk); g_regexTester.should_not_match("\111", R"(\1111)", awk); g_regexTester.should_not_match("1", R"(\1111)", awk); g_regexTester.should_not_match("11", R"(\1111)", awk); @@ -597,10 +591,8 @@ void test_gh_5244_atomescape_awk() { g_regexTester.should_match("\1", R"(\1)", awk); g_regexTester.should_match("\1a", R"(\1a)", awk); g_regexTester.should_throw(R"(\8)", error_escape, awk); - // clang-format off - g_regexTester.should_match("\1""8", R"(\18)", awk); - g_regexTester.should_match("\12""9", R"(\129)", awk); - // clang-format on + g_regexTester.should_match("\1"s + "8", R"(\18)", awk); + g_regexTester.should_match("\12"s + "9", R"(\129)", awk); // octal sequences evaluating to 0 are considered undefined by the standard g_regexTester.should_throw(R"(\0)", error_escape, awk); From ee7e9ae1b5ee64c59c56b630d4ea90ffb65e9307 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:08:01 -0700 Subject: [PATCH 08/14] Include more headers. --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index b48c30e46cf..8b37996a7ba 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include +#include #include #include From c2b5659d0dd95e987ad56763c0c50d2e5e780a95 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:09:28 -0700 Subject: [PATCH 09/14] Capture the bug title in a comment. --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 8b37996a7ba..aae1c901ef6 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -852,6 +852,7 @@ void test_gh_5244_classescape_awk() { } void test_gh_5244() { + // GH-5244: : Some escape sequences are mishandled test_gh_5244_atomescape_ecmascript(); test_gh_5244_classescape_ecmascript(); From 0227314bac66bb017d9018709cdad6205a8b32e2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:13:02 -0700 Subject: [PATCH 10/14] Take `const string&`. --- .../tests/GH_005244_regex_escape_sequences/test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index aae1c901ef6..96f79b19d37 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -103,7 +103,7 @@ void check_no_match(const string& subject, const string& pattern, const Rx& re, check_match(subject, pattern, re, flags, false); } -void check_atomescape_controlescape(string expected, string c, syntax_option_type option) { +void check_atomescape_controlescape(const string& expected, const string& c, syntax_option_type option) { string pattern = "\\" + c; g_regexTester.should_match(expected, pattern, option); g_regexTester.should_not_match("g", pattern, option); @@ -112,7 +112,7 @@ void check_atomescape_controlescape(string expected, string c, syntax_option_typ g_regexTester.should_not_match(pattern, pattern, option); } -void check_classescape_controlescape(string expected, string c, syntax_option_type option) { +void check_classescape_controlescape(const string& expected, const string& c, syntax_option_type option) { string pattern = "[\\" + c + "]"; g_regexTester.should_match(expected, pattern, option); g_regexTester.should_not_match("g", pattern, option); @@ -120,7 +120,7 @@ void check_classescape_controlescape(string expected, string c, syntax_option_ty g_regexTester.should_not_match("\\", pattern, option); } -void check_atomescape_identityescape(string c, syntax_option_type option) { +void check_atomescape_identityescape(const string& c, syntax_option_type option) { string pattern = "\\" + c; g_regexTester.should_match(c, pattern, option); g_regexTester.should_not_match("g", pattern, option); @@ -128,14 +128,14 @@ void check_atomescape_identityescape(string c, syntax_option_type option) { g_regexTester.should_not_match(pattern, pattern, option); } -void check_classescape_identityescape(string c, syntax_option_type option) { +void check_classescape_identityescape(const string& c, syntax_option_type option) { string pattern = "[\\" + c + "]"; g_regexTester.should_match(c, pattern, option); g_regexTester.should_not_match("g", pattern, option); g_regexTester.should_not_match("\\", pattern, option); } -void check_classescape_noescape(string c, syntax_option_type option) { +void check_classescape_noescape(const string& c, syntax_option_type option) { string pattern = "[\\" + c + "]"; g_regexTester.should_match(c, pattern, option); g_regexTester.should_match("\\", pattern, option); From ba2b655ad5d667f78fd15bf7c70b70b1ec892605 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:55:15 -0700 Subject: [PATCH 11/14] Test spaces matched against `\d` and `\D`. --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 96f79b19d37..2b66cf28fe9 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -220,6 +220,7 @@ void test_gh_5244_atomescape_ecmascript() { g_regexTester.should_not_match("-", R"(\d)", ECMAScript); g_regexTester.should_not_match("\1", R"(\d)", ECMAScript); g_regexTester.should_not_match("\\", R"(\d)", ECMAScript); + g_regexTester.should_not_match(" ", R"(\d)", ECMAScript); g_regexTester.should_not_match("0", R"(\D)", ECMAScript); g_regexTester.should_not_match("5", R"(\D)", ECMAScript); g_regexTester.should_match("a", R"(\D)", ECMAScript); @@ -227,6 +228,7 @@ void test_gh_5244_atomescape_ecmascript() { g_regexTester.should_match("-", R"(\D)", ECMAScript); g_regexTester.should_match("\1", R"(\D)", ECMAScript); g_regexTester.should_match("\\", R"(\D)", ECMAScript); + g_regexTester.should_match(" ", R"(\D)", ECMAScript); g_regexTester.should_match("a", R"(\w)", ECMAScript); g_regexTester.should_match("A", R"(\w)", ECMAScript); @@ -368,6 +370,7 @@ void test_gh_5244_classescape_ecmascript() { g_regexTester.should_not_match("-", R"([\d])", ECMAScript); g_regexTester.should_not_match("\1", R"([\d])", ECMAScript); g_regexTester.should_not_match("\\", R"([\d])", ECMAScript); + g_regexTester.should_not_match(" ", R"([\d])", ECMAScript); g_regexTester.should_not_match("0", R"([\D])", ECMAScript); g_regexTester.should_not_match("5", R"([\D])", ECMAScript); g_regexTester.should_match("a", R"([\D])", ECMAScript); @@ -375,6 +378,7 @@ void test_gh_5244_classescape_ecmascript() { g_regexTester.should_match("-", R"([\D])", ECMAScript); g_regexTester.should_match("\1", R"([\D])", ECMAScript); g_regexTester.should_match("\\", R"([\D])", ECMAScript); + g_regexTester.should_match(" ", R"([\D])", ECMAScript); g_regexTester.should_match("a", R"([\w])", ECMAScript); g_regexTester.should_match("A", R"([\w])", ECMAScript); From e57c401d565b144e348c6a4634868e95f52eee77 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 03:55:57 -0700 Subject: [PATCH 12/14] Sort awk-only escapes. --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 2b66cf28fe9..59da25ce327 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -491,8 +491,8 @@ void test_gh_5244_atomescape_posix_not_awk(syntax_option_type option) { g_regexTester.should_throw(R"(\b)", error_escape, option); g_regexTester.should_throw(R"(\f)", error_escape, option); g_regexTester.should_throw(R"(\n)", error_escape, option); - g_regexTester.should_throw(R"(\t)", error_escape, option); g_regexTester.should_throw(R"(\r)", error_escape, option); + g_regexTester.should_throw(R"(\t)", error_escape, option); g_regexTester.should_throw(R"(\v)", error_escape, option); g_regexTester.should_throw(R"(\")", error_escape, option); g_regexTester.should_throw(R"(\/)", error_escape, option); From ad6f36f9681110f6d2c3b99c73942fb29effeb1a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 04:07:58 -0700 Subject: [PATCH 13/14] Fix: `"\0"` => `"\0"s` --- .../GH_005244_regex_escape_sequences/test.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 59da25ce327..27436bae4e0 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -752,27 +752,27 @@ void test_gh_5244_classescape_awk() { g_regexTester.should_not_match("1", R"([\101])", awk); g_regexTester.should_not_match("0", R"([\101])", awk); g_regexTester.should_not_match("\1", R"([\101])", awk); - g_regexTester.should_not_match("\0", R"([\101])", awk); + g_regexTester.should_not_match("\0"s, R"([\101])", awk); g_regexTester.should_match("A", R"([\101a])", awk); g_regexTester.should_match("a", R"([\101a])", awk); g_regexTester.should_not_match("\\", R"([\101a])", awk); g_regexTester.should_not_match("1", R"([\101a])", awk); g_regexTester.should_not_match("0", R"([\101a])", awk); g_regexTester.should_not_match("\1", R"([\101a])", awk); - g_regexTester.should_not_match("\0", R"([\101a])", awk); + g_regexTester.should_not_match("\0"s, R"([\101a])", awk); g_regexTester.should_match("\33", R"([\033])", awk); g_regexTester.should_not_match("\\", R"([\033])", awk); g_regexTester.should_not_match("3", R"([\033])", awk); g_regexTester.should_not_match("0", R"([\033])", awk); g_regexTester.should_not_match("\3", R"([\033])", awk); - g_regexTester.should_not_match("\0", R"([\033])", awk); + g_regexTester.should_not_match("\0"s, R"([\033])", awk); g_regexTester.should_match("\33", R"([\033a])", awk); g_regexTester.should_match("a", R"([\033a])", awk); g_regexTester.should_not_match("\\", R"([\033a])", awk); g_regexTester.should_not_match("3", R"([\033a])", awk); g_regexTester.should_not_match("0", R"([\033a])", awk); g_regexTester.should_not_match("\3", R"([\033a])", awk); - g_regexTester.should_not_match("\0", R"([\033a])", awk); + g_regexTester.should_not_match("\0"s, R"([\033a])", awk); g_regexTester.should_match("\33", R"([\33])", awk); g_regexTester.should_not_match("\\", R"([\33])", awk); g_regexTester.should_not_match("3", R"([\33])", awk); @@ -786,24 +786,24 @@ void test_gh_5244_classescape_awk() { g_regexTester.should_not_match("\\", R"([\001])", awk); g_regexTester.should_not_match("1", R"([\001])", awk); g_regexTester.should_not_match("0", R"([\001])", awk); - g_regexTester.should_not_match("\0", R"([\001])", awk); + g_regexTester.should_not_match("\0"s, R"([\001])", awk); g_regexTester.should_match("\1", R"([\001a])", awk); g_regexTester.should_match("a", R"([\001a])", awk); g_regexTester.should_not_match("\\", R"([\001a])", awk); g_regexTester.should_not_match("1", R"([\001a])", awk); g_regexTester.should_not_match("0", R"([\001a])", awk); - g_regexTester.should_not_match("\0", R"([\001a])", awk); + g_regexTester.should_not_match("\0"s, R"([\001a])", awk); g_regexTester.should_match("\1", R"([\01])", awk); g_regexTester.should_not_match("\\", R"([\01])", awk); g_regexTester.should_not_match("1", R"([\01])", awk); g_regexTester.should_not_match("0", R"([\01])", awk); - g_regexTester.should_not_match("\0", R"([\01])", awk); + g_regexTester.should_not_match("\0"s, R"([\01])", awk); g_regexTester.should_match("\1", R"([\01a])", awk); g_regexTester.should_match("a", R"([\01a])", awk); g_regexTester.should_not_match("\\", R"([\01a])", awk); g_regexTester.should_not_match("1", R"([\01a])", awk); g_regexTester.should_not_match("0", R"([\01a])", awk); - g_regexTester.should_not_match("\0", R"([\01a])", awk); + g_regexTester.should_not_match("\0"s, R"([\01a])", awk); g_regexTester.should_match("\1", R"([\1])", awk); g_regexTester.should_not_match("\\", R"([\1])", awk); g_regexTester.should_not_match("1", R"([\1])", awk); From fe213845c4dd0a829c383c114e04376ea4d19901 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 9 Apr 2025 04:13:03 -0700 Subject: [PATCH 14/14] Don't check for awk in test_gh_5244_classescape_posix_not_awk. --- tests/std/tests/GH_005244_regex_escape_sequences/test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 27436bae4e0..09641ea5dcf 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -625,7 +625,7 @@ void test_gh_5244_classescape_posix_not_awk(syntax_option_type option) { check_classescape_noescape("|", option); // TRANSITION, GH-5379 - if (option & (extended | egrep | awk)) { + if (option & (extended | egrep)) { check_classescape_noescape("(", option); check_classescape_noescape(")", option); check_classescape_noescape("{", option); @@ -637,7 +637,7 @@ void test_gh_5244_classescape_posix_not_awk(syntax_option_type option) { g_regexTester.should_not_match("\\", R"([\]])", option); // TRANSITION, GH-5379 - if (option & (extended | egrep | awk)) { + if (option & (extended | egrep)) { check_classescape_noescape("}", option); }