Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 62 additions & 47 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1679,6 +1679,7 @@ enum class _Rx_unwind_ops {
_After_neg_assert,
_Disjunction_eval_alt_on_failure,
_Disjunction_eval_alt_always,
_Do_nothing,
};

template <class _BidIt>
Expand Down Expand Up @@ -1811,10 +1812,11 @@ private:

void _Increase_stack_usage_count();
void _Decrease_stack_usage_count();
void _Increase_complexity_count();

bool _Do_rep0(_Node_rep*, bool);
bool _Do_rep(_Node_rep*, bool, int);
bool _Do_rep_first(_Node_rep*);
void _Prepare_rep(_Node_rep*);
bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
_It _Do_class(_Node_base*, _It);
bool _Match_pat(_Node_base*);
Expand Down Expand Up @@ -3403,34 +3405,19 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Decrease_stack_usage_cou
}
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_count() {
if (0 < _Max_complexity_count && --_Max_complexity_count <= 0) {
_Xregex_error(regex_constants::error_complexity);
}
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy) {
// apply repetition to loop with no nested if/do
int _Ix = 0;
const size_t _Frame_idx = _Push_frame();

if (0 < _Node->_Min) {
// GH-5365: We can avoid resetting capture groups for the first iteration
// because we know that a simple repetition of this loop was not encountered before.
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
_Pop_frame(_Frame_idx);
return false;
} else if (_Tgt_state._Cur == _Frames[_Frame_idx]._Match_state._Cur) { // matches empty string
// loop is branchless, so it will only ever match empty strings
// -> skip all other matches as they don't change state and immediately try tail
_Pop_frame(_Frame_idx);
return _Match_pat(_Node->_End_rep->_Next);
} else { // loop never matches the empty string
for (_Ix = 1; _Ix < _Node->_Min; ++_Ix) { // do minimum number of reps
// GH-5365: We have to reset the capture groups from the second iteration on.
_Tgt_state._Grp_valid = _Frames[_Frame_idx]._Match_state._Grp_valid;
if (!_Match_pat(_Node->_Next)) { // didn't match minimum number of reps, fail
_Pop_frame(_Frame_idx);
return false;
}
}
}
}
int _Ix = _Node->_Min;
const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
_Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 1;

_Tgt_state_t<_It> _Final;
bool _Matched0 = false;
Expand All @@ -3439,7 +3426,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node

if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
_Pop_frame(_Frame_idx);
return true; // go with current match
}

Expand All @@ -3458,14 +3444,12 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
_Done = true;
// we only potentially accept/try tail for POSIX
if ((_Sflags & regex_constants::_Any_posix) && _Match_pat(_Node->_End_rep->_Next)) {
_Pop_frame(_Frame_idx);
return true; // go with current match
}
} else {
_Saved_pos = _Tgt_state._Cur;
if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
_Pop_frame(_Frame_idx);
return true; // go with current match
}

Expand All @@ -3489,7 +3473,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
_Saved_pos = _Tgt_state._Cur;
if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
_Pop_frame(_Frame_idx);
return true; // go with current match
}

Expand All @@ -3504,7 +3487,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
_Tgt_state = _Final;
}

_Pop_frame(_Frame_idx);
return _Matched0;
}

Expand Down Expand Up @@ -3577,12 +3559,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep(_Node_rep* _Node,
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* _Node) {
bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;
// apply repetition
if (_Node->_Simple_loop == 1) {
return _Do_rep0(_Node, _Greedy);
}
void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Prepare_rep(_Node_rep* _Node) {
_Loop_vals_v2_t* _Psav = &_Loop_vals[_Node->_Loop_number];

// Determine first capture group in repetition for later capture group reset, if not done so previously.
Expand All @@ -3593,8 +3570,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep*
_Psav->_Group_first = static_cast<unsigned int>(_Tgt_state._Grp_valid.size());
}
}

return _Do_rep(_Node, _Greedy, 0);
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
Expand Down Expand Up @@ -4153,22 +4128,58 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
break;

case _N_rep:
if (!_Do_rep_first(static_cast<_Node_rep*>(_Nx))) {
_Failed = true;
{
auto _Node = static_cast<_Node_rep*>(_Nx);
_Prepare_rep(_Node);
bool _Greedy = (_Node->_Flags & _Fl_greedy) != 0;

if (_Node->_Simple_loop == 1) {
auto& _Sav = _Loop_vals[_Node->_Loop_number];
_Sav._Loop_idx = 1;
_Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
if (_Node->_Min == 0) {
_Failed = !_Do_rep0(_Node, _Greedy);
_Next = nullptr;
} else {
_Increase_complexity_count();
}
} else {
_Failed = !_Do_rep(_Node, _Greedy, 0);
_Next = nullptr;
}
}

_Next = nullptr;
break;

case _N_end_rep:
{
_Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
if (_Nr->_Simple_loop == 0
&& !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Loop_vals[_Nr->_Loop_number]._Loop_idx)) {
_Failed = true; // recurse only if loop contains if/do
auto& _Sav = _Loop_vals[_Nr->_Loop_number];
if (_Nr->_Simple_loop != 0) {
if (_Sav._Loop_idx <= _Nr->_Min) {
if (_Sav._Loop_idx == 1
&& _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // match empty
// loop is branchless, so it will only ever match empty strings
// -> skip all other matches as they don't change state and immediately try tail
_Increase_complexity_count();
// _Next is already assigned correctly for matching tail
} else if (_Sav._Loop_idx < _Nr->_Min) { // needs at least one more rep to reach minimum
_Increase_complexity_count();
// GH-5365: We have to reset the capture groups from the second iteration on.
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
_Next = _Nr->_Next;
++_Sav._Loop_idx;
} else { // minimum number of reps reached
_Failed = !_Do_rep0(_Nr, (_Nr->_Flags & _Fl_greedy) != 0);
_Next = nullptr;
}
} else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
_Next = nullptr;
}
} else {
_Failed = !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Sav._Loop_idx);
Comment on lines +4173 to +4180
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible followup, no change requested for this PR: There are now two mentions of (_Nr->_Flags & _Fl_greedy) != 0. I'm assuming this doesn't change, so we should consider extracting it as bool _Greedy.

Copy link
Contributor Author

@muellerj2 muellerj2 Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, probably a good idea. I just didn't spend too much thought on making the recursive calls look pretty because I intend to remove them soon. If the logic for simple or generic loops mostly remains separate, though, this expression will still appear in two places.

_Next = nullptr;
}

_Next = nullptr;
break;
}

Expand Down Expand Up @@ -4243,6 +4254,7 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
_Nx = _Node->_Next;
_Tgt_state = _Frame._Match_state;
_Failed = false;
_Increase_complexity_count();
if (_Node->_Child) {
_Frame._Node = _Node->_Child;
++_Frames_count;
Expand All @@ -4252,6 +4264,9 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
break;
}

case _Rx_unwind_ops::_Do_nothing:
break;

default:
#if _ITERATOR_DEBUG_LEVEL != 0
_STL_REPORT_ERROR("internal stack of regex matcher corrupted");
Expand Down