diff --git a/stl/inc/regex b/stl/inc/regex index 1434e738cd9..2658b3cade5 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -382,7 +382,10 @@ public: template string_type lookup_collatename(_FwdIt _First, _FwdIt _Last) const { // map [_First, _Last) to collation element - return string_type{_First, _Last}; + if (_First != _Last && _STD next(_First) == _Last) { + return string_type{_First, _Last}; + } + return string_type{}; } locale_type imbue(locale_type _Lx) { // store locale object @@ -1507,8 +1510,6 @@ public: template class _Builder { // provides operations used by _Parser to build the nfa public: - using _Difft = typename iterator_traits<_FwdIt>::difference_type; - _Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type); void _Setlong(); // _Discard_pattern is an ABI zombie name @@ -1525,8 +1526,8 @@ public: void _Add_char_to_class(_Elem _Ch); void _Add_range2(_Elem, _Elem); void _Add_named_class(typename _RxTraits::char_class_type, bool); - void _Add_equiv(_FwdIt, _FwdIt, _Difft); - void _Add_coll(_FwdIt, _FwdIt, _Difft); + void _Add_equiv2(const _Elem*, const _Elem*); + void _Add_coll2(const _Elem*, const _Elem*); _Node_base* _Begin_group(); void _End_group(_Node_base* _Back); _Node_base* _Begin_assert_group(bool); @@ -1547,7 +1548,7 @@ private: void _Add_char_to_bitmap(_Elem _Ch); void _Add_char_to_array(_Elem _Ch); void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool); - void _Char_to_elts(_FwdIt, _FwdIt, _Difft, _Sequence<_Elem>**); + void _Char_to_elts2(const _Elem*, const _Elem*, _Sequence<_Elem>**); _Root_node* _Root; _Node_base* _Current; @@ -1733,7 +1734,7 @@ private: bool _DecimalDigits3(regex_constants::error_type _Error_type, int _Initial = 0); void _HexDigits(int); bool _OctalDigits(); - void _Do_ex_class(_Meta_type); + _Prs_ret _Do_ex_class2(_Meta_type); bool _CharacterClassEscape(bool); _Prs_ret _ClassEscape3(); _Prs_ret _ClassAtom(bool); @@ -1752,6 +1753,7 @@ private: void _Quantifier(); bool _Alternative(); void _Disjunction(); + void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep); _FwdIt _Pat; _FwdIt _Begin; @@ -2952,16 +2954,17 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class(typename _RxTraits::ch } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(_FwdIt _First, _FwdIt _Last, _Difft _Diff, +void _Builder<_FwdIt, _Elem, _RxTraits>::_Char_to_elts2(const _Elem* const _First, const _Elem* const _Last, _Sequence<_Elem>** _Cur) { // add collation element to element sequence - while (*_Cur && static_cast(_Diff) < (*_Cur)->_Sz) { + auto _Diff = static_cast(_Last - _First); + while (*_Cur && _Diff < (*_Cur)->_Sz) { _Cur = &(*_Cur)->_Next; } - if (!(*_Cur) || static_cast(_Diff) != (*_Cur)->_Sz) { + if (!(*_Cur) || _Diff != (*_Cur)->_Sz) { // add new sequence holding elements of the same length _Sequence<_Elem>* _Node = *_Cur; - *_Cur = new _Sequence<_Elem>(static_cast(_Diff)); + *_Cur = new _Sequence<_Elem>(_Diff); (*_Cur)->_Next = _Node; } (*_Cur)->_Data._Insert2(_First, _Last); @@ -2978,10 +2981,15 @@ unsigned int _Builder<_FwdIt, _Elem, _RxTraits>::_Get_tmax() const { } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_equiv(_FwdIt _First, _FwdIt _Last, _Difft _Diff) { +void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_equiv2(const _Elem* const _First, const _Elem* const _Last) { // add elements of equivalence class to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); typename _RxTraits::string_type _Str = _Traits.transform_primary(_First, _Last); + + if (_Str.empty()) { + _Xregex_error(regex_constants::error_collate); + } + for (unsigned int _Ch = 0; _Ch < _Bmp_max; ++_Ch) { // add elements _Elem _Ex = static_cast<_Elem>(_Ch); if (_Traits.transform_primary(_STD addressof(_Ex), _STD addressof(_Ex) + 1) @@ -2995,16 +3003,16 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_equiv(_FwdIt _First, _FwdIt _Last, } if (_Bmp_max < static_cast(_STD _Max_limit<_Elem>())) { // map range _Sequence<_Elem>** _Cur = _STD addressof(_Node->_Equiv); - _Char_to_elts(_First, _Last, _Diff, _Cur); + _Char_to_elts2(_First, _Last, _Cur); } } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_coll(_FwdIt _First, _FwdIt _Last, _Difft _Diff) { +void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_coll2(const _Elem* const _First, const _Elem* const _Last) { // add collation element to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); _Sequence<_Elem>** _Cur = _STD addressof(_Node->_Coll); - _Char_to_elts(_First, _Last, _Diff, _Cur); + _Char_to_elts2(_First, _Last, _Cur); } template @@ -3399,11 +3407,11 @@ bool _Lookup_collating_range(const _Elem _Ch, const _Buf<_Elem>* const _Bufptr, } template -bool _Lookup_equiv(typename _RxTraits::_Uelem _Ch, const _Sequence<_Elem>* _Eq, const _RxTraits& _Traits) { +bool _Lookup_equiv2(_Elem _Ch, const _Sequence<_Elem>* _Eq, const _RxTraits& _Traits) { // check whether _Ch is in _Eq typename _RxTraits::string_type _Str0; typename _RxTraits::string_type _Str1; - _Str1.push_back(static_cast<_Elem>(_Ch)); + _Str1.push_back(_Ch); _Str1 = _Traits.transform_primary(_Str1.begin(), _Str1.end()); while (_Eq) { // look for sequence of elements that are the right size for (unsigned int _Ix = 0; _Ix < _Eq->_Data._Size(); _Ix += _Eq->_Sz) { // look for _Ch @@ -3418,22 +3426,48 @@ bool _Lookup_equiv(typename _RxTraits::_Uelem _Ch, const _Sequence<_Elem>* _Eq, return false; } -template -_BidIt _Lookup_coll(_BidIt _First, _BidIt _Last, const _Sequence<_Elem>* _Eq) { - // look for collation element [_First, _Last) in _Eq - while (_Eq) { // look for sequence of elements that are the right size - for (unsigned int _Ix = 0; _Ix < _Eq->_Data._Size(); _Ix += _Eq->_Sz) { // look for character range - _BidIt _Res = _First; - for (size_t _Jx = 0; _Jx < _Eq->_Sz; ++_Jx) { // check current character - if (*_Res++ != *(_Eq->_Data._Str() + _Ix + _Jx)) { - break; - } +template +_BidIt _Lookup_coll2(_Elem _First_ch, _BidIt _First, const _BidIt _Last, const _Sequence<_Elem>* _Seq, + const _RxTraits& _Traits, const regex_constants::syntax_option_type _Flags) { + // look for collation element [_First, _Last) in _Seq + typename _RxTraits::string_type _Str; + + // extend translated input character sequence + if (_Seq) { // the longest collating elements come first + _Str.push_back(_First_ch); + const auto _Coll_size = _Seq->_Sz; + size_t _Str_size = 1; + _BidIt _Pos = _First; + ++_Pos; + + for (; _Str_size < _Coll_size && _Pos != _Last; ++_Pos) { + _Elem _Ch = *_Pos; + if (_Flags & regex_constants::icase) { + _Ch = _Traits.translate_nocase(_Ch); + } else if (_Flags & regex_constants::collate) { + _Ch = _Traits.translate(_Ch); } - if (_Res == _Last) { - return _Last; + _Str.push_back(_Ch); + ++_Str_size; + } + } + + while (_Seq) { // look for sequence of elements that are the right size + const auto _Size = _Seq->_Sz; + + // match input character sequence to stored collating elements + if (_Str.size() >= _Size) { + const _Elem* const _Str_first = _Str.data(); + const _Elem* const _Str_last = _Str_first + _Size; + const _Elem* _Current = _Seq->_Data._Str(); + for (auto _Remaining = _Seq->_Data._Size(); _Remaining >= _Size; _Current += _Size, _Remaining -= _Size) { + if (_STD equal(_Str_first, _Str_last, _Current)) { + _STD advance(_First, static_cast<_Iter_diff_t<_BidIt>>(_Size)); + return _First; + } } } - _Eq = _Eq->_Next; + _Seq = _Seq->_Next; } return _First; } @@ -3454,7 +3488,7 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_class(_Node_base* _Nx) { // ap _It _Resx; _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx); if (_Node->_Coll - && (_Resx = _STD _Lookup_coll(_Tgt_state._Cur, _End, _Node->_Coll)) + && (_Resx = _STD _Lookup_coll2(_Ch, _Tgt_state._Cur, _End, _Node->_Coll, _Traits, _Sflags)) != _Tgt_state._Cur) { // check for collation element _Res0 = _Resx; _Found = true; @@ -3470,7 +3504,7 @@ bool _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Do_class(_Node_base* _Nx) { // ap _Found = true; } else if (_Node->_Classes != typename _RxTraits::char_class_type{} && _Traits.isctype(_Ch, _Node->_Classes)) { _Found = true; - } else if (_Node->_Equiv && _STD _Lookup_equiv(_UCh, _Node->_Equiv, _Traits)) { + } else if (_Node->_Equiv && _STD _Lookup_equiv2(_Ch, _Node->_Equiv, _Traits)) { _Found = true; } else { _Found = false; @@ -3811,10 +3845,9 @@ _BidIt _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Skip(_BidIt _First_arg, _BidIt const auto _UCh = static_cast(_Ch); _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Nx); - _It _Next = _First_arg; - ++_Next; - if (_Node->_Coll && _STD _Lookup_coll(_First_arg, _Next, _Node->_Coll) != _First_arg) { + if (_Node->_Coll + && _STD _Lookup_coll2(_Ch, _First_arg, _Last, _Node->_Coll, _Traits, _Sflags) != _First_arg) { _Found = true; } else if (_Node->_Ranges && (_Sflags & regex_constants::collate @@ -3830,7 +3863,7 @@ _BidIt _Matcher<_BidIt, _Elem, _RxTraits, _It>::_Skip(_BidIt _First_arg, _BidIt } else if (_Node->_Classes != typename _RxTraits::char_class_type{} && _Traits.isctype(_Ch, _Node->_Classes)) { _Found = true; - } else if (_Node->_Equiv && _STD _Lookup_equiv(_UCh, _Node->_Equiv, _Traits)) { + } else if (_Node->_Equiv && _STD _Lookup_equiv2(_Ch, _Node->_Equiv, _Traits)) { _Found = true; } else { _Found = false; @@ -4074,45 +4107,68 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 oc } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class( +_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class2( _Meta_type _End_arg) { // handle delimited expressions within bracket expression - regex_constants::error_type _Errtype = (_End_arg == _Meta_colon ? regex_constants::error_ctype - : _End_arg == _Meta_equal ? regex_constants::error_collate - : _End_arg == _Meta_dot ? regex_constants::error_collate - : regex_constants::error_syntax); - _FwdIt _Beg = _Pat; - _Iter_diff_t<_FwdIt> _Diff = 0; + const regex_constants::error_type _Errtype = + _End_arg == _Meta_colon ? regex_constants::error_ctype : regex_constants::error_collate; + const _FwdIt _Beg = _Pat; while (_Mchar != _Meta_colon && _Mchar != _Meta_equal && _Mchar != _Meta_dot && _Mchar != _Meta_eos) { // advance to end delimiter _Next(); - ++_Diff; } - if (_Mchar != _End_arg) { - _Error(_Errtype); - } else if (_End_arg == _Meta_colon) { // handle named character class + + const _FwdIt _End = _Pat; + _Expect(_End_arg, _Errtype); + _Expect(_Meta_rsq, _Errtype); + + if (_End_arg == _Meta_colon) { // handle named character class typename _RxTraits::char_class_type _Cls = - _Traits.lookup_classname(_Beg, _Pat, (_Flags & regex_constants::icase) != 0); + _Traits.lookup_classname(_Beg, _End, (_Flags & regex_constants::icase) != 0); if (!_Cls) { _Error(regex_constants::error_ctype); } _Nfa._Add_named_class(_Cls, false); - } else if (_End_arg == _Meta_equal) { // process = - if (_Beg == _Pat) { + return _Prs_set; + } else { + typename _RxTraits::string_type _Coll_elem = _Traits.lookup_collatename(_Beg, _End); + const auto _Size = _Coll_elem.size(); + + if (_Size == 0) { _Error(regex_constants::error_collate); - } else { - _Nfa._Add_equiv(_Beg, _Pat, _Diff); } - } else if (_End_arg == _Meta_dot) { // process . - if (_Beg == _Pat) { - _Error(regex_constants::error_collate); - } else { - _Nfa._Add_coll(_Beg, _Pat, _Diff); + + if (_Size > _Max_limit()) { + _Error(regex_constants::error_space); + } + + _Elem* const _Coll_elem_first = &_Coll_elem.front(); + const _Elem* const _Coll_elem_last = _Coll_elem_first + _Size; + if (_End_arg == _Meta_equal) { // process equivalence + _Nfa._Add_equiv2(_Coll_elem_first, _Coll_elem_last); + return _Prs_set; + } else { // process collating element + if (_Size == 1) { + _Val = *_Coll_elem_first; + return _Prs_chr; + } + + // Character ranges with multi-character bounds cannot be represented in NFA nodes yet (see GH-5391). + // Provisionally treat multi-character collating elements as character sets. + if (_Flags & regex_constants::icase) { + for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) { + *_Current = _Traits.translate_nocase(*_Current); + } + } else if (_Flags & regex_constants::collate) { + for (auto _Current = _Coll_elem_first; _Current != _Coll_elem_last; ++_Current) { + *_Current = _Traits.translate(*_Current); + } + } + _Nfa._Add_coll2(_Coll_elem_first, _Coll_elem_last); + return _Prs_set; } } - _Next(); - _Expect(_Meta_rsq, _Errtype); } template @@ -4172,8 +4228,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // if (_Mchar == _Meta_colon || _Mchar == _Meta_equal || _Mchar == _Meta_dot) { // handle delimited expression _Meta_type _St = _Mchar; _Next(); - _Do_ex_class(_St); - return _Prs_set; + return _Do_ex_class2(_St); } else { // handle ordinary [ _Val = _Meta_lsq; return _Prs_chr; @@ -4621,7 +4676,9 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disj } } -inline void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep) { +template +void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( + _Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep) { // walks regex NFA, calculates values of _Node_rep::_Simple_loop for (; _Nx != _Ne && _Nx; _Nx = _Nx->_Next) { switch (_Nx->_Kind) { @@ -4662,6 +4719,19 @@ inline void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_r _Outer_rep = nullptr; } break; + case _N_class: + if (_Outer_rep) { + // _Node_rep is not simple if a class can match character sequences of different lengths + auto _Node = static_cast*>(_Nx); + bool _Coll_diff_size = + _Node->_Coll + && (_Node->_Small || _Node->_Large || _Node->_Ranges || _Node->_Classes || _Node->_Coll->_Next); + if (_Coll_diff_size || _Node->_Equiv + || ((_Flags & regex_constants::collate) && (_Node->_Ranges || (_Node->_Flags & _Fl_negate)))) { + _Outer_rep->_Simple_loop = 0; + } + } + break; case _N_none: case _N_nop: case _N_bol: @@ -4669,7 +4739,6 @@ inline void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_r case _N_wbound: case _N_dot: case _N_str: - case _N_class: case _N_group: case _N_end_group: case _N_end_assert: diff --git a/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp b/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp index afa5b36d162..75688bb31c2 100644 --- a/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp +++ b/tests/std/tests/GH_005204_regex_collating_ranges/test.cpp @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include #include @@ -338,8 +339,238 @@ void test_collating_ranges_german() { #endif // !defined(SKIP_COLLATE_TESTS) } +class gh_994_regex_traits : public regex_traits { +public: + template + string_type lookup_collatename(FwdIt first, FwdIt last) const { + // from Hungarian + const string_type collating_symbols[] = {"cs", "Cs", "dzs"}; + const string_type hyphen_name = "hyphen"; + + for (const string_type& coll_symbol : collating_symbols) { + if (equal(first, last, begin(coll_symbol), end(coll_symbol))) { + return coll_symbol; + } + } + + if (equal(first, last, begin(hyphen_name), end(hyphen_name))) { + return "-"; + } + + return regex_traits::lookup_collatename(first, last); + } +}; + +using gh_994_regex = basic_regex; + +void gh_994_verify_match( + const string& subject, const string& pattern, const bool correct, const syntax_option_type syntax = ECMAScript) { + + gh_994_regex r; + try { + r.assign(pattern, syntax); + } catch (const regex_error& e) { + printf(R"(Failed to construct regex("%s", 0x%X) for traits gh_994_regex_traits: "%s")" + "\n", + pattern.c_str(), static_cast(syntax), e.what()); + g_regexTester.fail_regex(); + return; + } + + try { + if (regex_match(subject, r) != correct) { + printf(R"(Expected regex_match("%s", regex("%s", 0x%X)) to be %s for traits gh_994_regex_traits.)" + "\n", + subject.c_str(), pattern.c_str(), static_cast(syntax), correct ? "true" : "false"); + g_regexTester.fail_regex(); + } + } catch (const regex_error& e) { + printf(R"(Failed to regex_match("%s", regex("%s", 0x%X)) for traits gh_994_regex_traits: regex_error: "%s")" + "\n", + subject.c_str(), pattern.c_str(), static_cast(syntax), e.what()); + g_regexTester.fail_regex(); + } +} + +void gh_994_should_throw( + const string& pattern, const error_type expected_code, const syntax_option_type syntax = ECMAScript) { + + try { + gh_994_regex r(pattern, syntax); + + printf(R"(regex r("%s", 0x%X) succeeded for traits gh_994_regex_traits (which is bad).)" + "\n", + pattern.c_str(), static_cast(syntax)); + g_regexTester.fail_regex(); + } catch (const regex_error& e) { + if (e.code() != expected_code) { + printf(R"(regex r("%s", 0x%X) threw 0x%X for traits gh_994_regex_traits; expected 0x%X)" + "\n", + pattern.c_str(), static_cast(syntax), static_cast(e.code()), + static_cast(expected_code)); + g_regexTester.fail_regex(); + } + } +} + +void test_gh_994() { + // GH-994: Regex with collating symbol erroneously returns a match + // PR fixed parsing and matching of collating symbols and equivalences in character classes + + g_regexTester.should_not_match("v", "[[.(.]a[a]"); + + g_regexTester.should_not_match("(((v", "[[.(.]]*"); + g_regexTester.should_not_match("v", "[[.(.]]*"); + g_regexTester.should_not_match("vv", "[[.(.]]*"); + + g_regexTester.should_match("xxx", "[[.(.]x]*"); + g_regexTester.should_match("x((x(", "[[.(.]x]*"); + g_regexTester.should_not_match("xxxv", "[[.(.]x]*"); + g_regexTester.should_not_match("xxxvv", "[[.(.]x]*"); + g_regexTester.should_not_match("x(xv", "[[.(.]x]*"); + g_regexTester.should_not_match("v", "[[.(.]x]*"); + g_regexTester.should_not_match("vv", "[[.(.]x]*"); + g_regexTester.should_not_match("xxxv", "[[.(.]x]*"); + + g_regexTester.should_throw("[[.whatisthis.]]", error_collate); + + gh_994_verify_match("a", "[[.cs.]a]", true); + gh_994_verify_match("c", "[[.cs.]a]", false); + gh_994_verify_match("ca", "[[.cs.]a]", false); + gh_994_verify_match("ct", "[[.cs.]a]", false); + gh_994_verify_match("cs", "[[.cs.]a]", true); + gh_994_verify_match("Cs", "[[.cs.]a]", false); + gh_994_verify_match("dsz", "[[.cs.]a]", false); + gh_994_should_throw("[[.CS.]]", error_collate); + + gh_994_verify_match("cs", "[[.cs.][.dzs.]]", true); + gh_994_verify_match("dzs", "[[.cs.][.dzs.]]", true); + gh_994_verify_match("dz", "[[.cs.][.dzs.]]", false); + gh_994_verify_match("Cs", "[[.cs.][.dzs.]]", false); + gh_994_verify_match("cdzs", "[[.cs.][.dzs.]]", false); + gh_994_verify_match("csdzs", "[[.cs.][.dzs.]]", false); + gh_994_verify_match("a", "[[.cs.][.dzs.]]", false); + gh_994_verify_match("dzt", "[[.cs.][.dzs.]]", false); + + gh_994_verify_match("csa", "[[.cs.][.dzs.]]a", true); + gh_994_verify_match("csb", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("Csa", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("dzsa", "[[.cs.][.dzs.]]a", true); + gh_994_verify_match("dzsb", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("dza", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("cdzsa", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("csdzsa", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("a", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("aa", "[[.cs.][.dzs.]]a", false); + gh_994_verify_match("dzta", "[[.cs.][.dzs.]]a", false); + + gh_994_verify_match("dzscs", "[[.cs.][.dzs.]a]*", true); + gh_994_verify_match("dzsacs", "[[.cs.][.dzs.]a]*", true); + gh_994_verify_match("dzsbcsa", "[[.cs.][.dzs.]a]*", false); + gh_994_verify_match("dzscsb", "[[.cs.][.dzs.]a]*", false); + gh_994_verify_match("dzscsb", "[[.cs.][.dzs.]a]*b", true); + gh_994_verify_match("dzsCsb", "[[.cs.][.dzs.]a]*b", false); + gh_994_verify_match("bdzscs", "[[.cs.][.dzs.]a]*", false); + gh_994_verify_match("bdzscs", "b[[.cs.][.dzs.]a]*", true); + + gh_994_verify_match("-", "[[.hyphen.]]", true); + gh_994_verify_match("hyphen", "[[.hyphen.]]", false); + gh_994_verify_match("h", "[[.hyphen.]]", false); + gh_994_verify_match("y", "[[.hyphen.]]", false); + gh_994_verify_match("n", "[[.hyphen.]]", false); + + gh_994_verify_match("cs", "[[.cs.]]", true, icase); + gh_994_verify_match("Cs", "[[.cs.]]", true, icase); + gh_994_verify_match("CS", "[[.cs.]]", true, icase); + gh_994_verify_match("cs", "[[.Cs.]]", true, icase); + gh_994_verify_match("Cs", "[[.Cs.]]", true, icase); + gh_994_verify_match("CS", "[[.Cs.]]", true, icase); + gh_994_should_throw("[[.CS.]]", error_collate, icase); + gh_994_verify_match("dzscsb", "[[.Cs.][.dzs.]a]*", false, icase); + gh_994_verify_match("dzscsb", "[[.cs.][.dzs.]a]*b", true, icase); + gh_994_verify_match("dzsCsb", "[[.cs.][.dzs.]a]*b", true, icase); + gh_994_verify_match("DzsCsb", "[[.cs.][.dzs.]a]*b", true, icase); + + gh_994_verify_match("cs", "[[.cs.]]", true, regex_constants::collate); + gh_994_verify_match("Cs", "[[.cs.]]", false, regex_constants::collate); + gh_994_verify_match("CS", "[[.cs.]]", false, regex_constants::collate); + gh_994_verify_match("cs", "[[.Cs.]]", false, regex_constants::collate); + gh_994_verify_match("Cs", "[[.Cs.]]", true, regex_constants::collate); + gh_994_verify_match("CS", "[[.Cs.]]", false, regex_constants::collate); + gh_994_should_throw("[[.CS.]]", error_collate, regex_constants::collate); + gh_994_verify_match("dzscsb", "[[.cs.][.dzs.]a]*", false, regex_constants::collate); + gh_994_verify_match("dzscsb", "[[.cs.][.dzs.]a]*b", true, regex_constants::collate); + gh_994_verify_match("dzsCsb", "[[.cs.][.dzs.]a]*b", false, regex_constants::collate); + gh_994_verify_match("DzsCsb", "[[.cs.][.dzs.]a]*b", false, regex_constants::collate); + + g_regexTester.should_match("b", "[[.b.]-f]"); + g_regexTester.should_match("f", "[[.b.]-f]"); + g_regexTester.should_not_match("a", "[[.b.]-f]"); + g_regexTester.should_not_match("g", "[[.b.]-f]"); + g_regexTester.should_match("b", "[b-[.f.]]"); + g_regexTester.should_match("f", "[b-[.f.]]"); + g_regexTester.should_not_match("a", "[b-[.f.]]"); + g_regexTester.should_not_match("g", "[b-[.f.]]"); + g_regexTester.should_match("b", "[[.b.]-[.f.]]"); + g_regexTester.should_match("f", "[[.b.]-[.f.]]"); + g_regexTester.should_not_match("a", "[[.b.]-[.f.]]"); + g_regexTester.should_not_match("g", "[[.b.]-[.f.]]"); + + g_regexTester.should_match("bi", "[[.b.]-f]i"); + g_regexTester.should_match("fi", "[[.b.]-f]i"); + g_regexTester.should_not_match("ai", "[[.b.]-f]i"); + g_regexTester.should_not_match("gi", "[[.b.]-f]i"); + g_regexTester.should_not_match("i", "[[.b.]-f]i"); + g_regexTester.should_match("bi", "[b-[.f.]]i"); + g_regexTester.should_match("fi", "[b-[.f.]]i"); + g_regexTester.should_not_match("ai", "[b-[.f.]]i"); + g_regexTester.should_not_match("gi", "[b-[.f.]]i"); + g_regexTester.should_not_match("i", "[b-[.f.]]i"); + g_regexTester.should_match("bi", "[[.b.]-[.f.]]i"); + g_regexTester.should_match("fi", "[[.b.]-[.f.]]i"); + g_regexTester.should_not_match("ai", "[[.b.]-[.f.]]i"); + g_regexTester.should_not_match("gi", "[[.b.]-[.f.]]i"); + g_regexTester.should_not_match("i", "[[.b.]-[.f.]]i"); + + g_regexTester.should_match("becdfi", "[[.b.]-[.f.]]*i"); + g_regexTester.should_not_match("becdfb", "[[.b.]-[.f.]]*i"); + g_regexTester.should_not_match("becdfj", "[[.b.]-[.f.]]*i"); + + // TRANSITION, GH-5391 + gh_994_should_throw("[[.cs.]-f]", error_range); + gh_994_should_throw("[a-[.cs.]]", error_range); + gh_994_should_throw("[[.cs.]-[.dzs.]]", error_range); + +#ifndef SKIP_COLLATE_TESTS + g_regexTester.should_throw("[[=a=]-c]", error_range); + g_regexTester.should_throw("[c-[=z=]]", error_range); + g_regexTester.should_throw("[[=a=]-[=z=]]", error_range); + + g_regexTester.should_match("a", "[[=a=]]"); + g_regexTester.should_match("A", "[[=a=]]"); + g_regexTester.should_not_match("b", "[[=a=]]"); + g_regexTester.should_not_match("B", "[[=a=]]"); + g_regexTester.should_match("z", "[[=Z=]]"); + g_regexTester.should_match("Z", "[[=Z=]]"); + g_regexTester.should_not_match("b", "[[=Z=]]"); + g_regexTester.should_not_match("B", "[[=Z=]]"); + + g_regexTester.should_match("ab", "[[=a=]]b"); + g_regexTester.should_match("Ab", "[[=a=]]b"); + g_regexTester.should_not_match("Ab", "[[=a=]]B"); + g_regexTester.should_not_match("b", "[[=a=]]b"); + g_regexTester.should_not_match("aab", "[[=a=]]b"); + g_regexTester.should_not_match("B", "[[=a=]]b"); + + g_regexTester.should_match("AaAaaAaab", "[[=a=]]*b"); + g_regexTester.should_not_match("AaAaaAaab", "[[=a=]]*c"); + g_regexTester.should_match("AaAabcaAaad", "[[=a=]bc]*d"); +#endif // !defined(SKIP_COLLATE_TESTS) +} + int main() { test_collating_ranges_german(); + test_gh_994(); return g_regexTester.result(); } diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 09641ea5dcf..370f2ea02f9 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -53,6 +53,11 @@ class test_regex_traits { return inner.transform_primary(first, last); } + template + string_type lookup_collatename(FwdIt first, FwdIt last) const { + return inner.lookup_collatename(first, last); + } + template char_class_type lookup_classname(FwdIt first, FwdIt last, bool icase = false) const { FwdIt next = first; diff --git a/tests/tr1/tests/regex1/test.cpp b/tests/tr1/tests/regex1/test.cpp index e633d92aa67..a9f4689b176 100644 --- a/tests/tr1/tests/regex1/test.cpp +++ b/tests/tr1/tests/regex1/test.cpp @@ -181,7 +181,7 @@ static void test_traits() { // test template regex_traits CHECK(v0.lookup_classname(class_names[i], class_names[i] + xlen(class_names[i])) != 0); } CHECK(v0.lookup_classname(carr, carr) == 0); - CHECK(v0.lookup_collatename(carr, carr + xlen(carr)) != STDString()); + CHECK(v0.lookup_collatename(carr, carr + xlen(carr)) == STDString()); CHECK(v0.isctype('0', v0.lookup_classname(class_names[0], class_names[0] + xlen(class_names[0])))); CHECK_INT(v0.value('7', 8), 7); CHECK_INT(v0.value('9', 8), -1); diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp index 630b96463ac..44cb5632126 100644 --- a/tests/tr1/tests/regex2/test.cpp +++ b/tests/tr1/tests/regex2/test.cpp @@ -588,7 +588,6 @@ static const regex_test tests[] = { {__LINE__, T("[[:xdigit:]]"), T("g"), "0", ALL}, {__LINE__, T("[[:xdigit:]]"), T("1"), "1 0 1", ALL}, {__LINE__, T("[[:xdigit:]]"), T(" "), "0", ALL}, - {__LINE__, T("[[.ch.]]"), T("ch"), "1 0 2", ALL}, {__LINE__, T("[[=x=]]"), T("X"), "1 0 1", ALL}, {__LINE__, T("[[=x=]]"), T("x"), "1 0 1", ALL},