-
Notifications
You must be signed in to change notification settings - Fork 1.6k
<regex>: Remove capture extent vectors from stack frames
#5865
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
36044b4
3397e3b
65a5124
fa4cb40
7436282
2632dd1
da8659e
5ecd581
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <benchmark/benchmark.h> | ||
| #include <regex> | ||
| #include <string> | ||
|
|
||
|
|
||
| using namespace std; | ||
| using namespace regex_constants; | ||
|
|
||
| void bm_match_sequence_of_as(benchmark::State& state, const char* pattern, syntax_option_type syntax = ECMAScript) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No change requested: The |
||
| string input(static_cast<size_t>(state.range()), 'a'); | ||
StephanTLavavej marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| regex re{pattern, syntax}; | ||
|
|
||
| for (auto _ : state) { | ||
| benchmark::DoNotOptimize(input); | ||
| const char* pos = input.data(); | ||
| const char* end = input.data() + input.size(); | ||
| cmatch match; | ||
| regex_match(pos, end, match, re); | ||
| } | ||
| } | ||
|
|
||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "a*", "a*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "a*?", "a*?")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(?:a)*", "(?:a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(a)*", "(a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(?:b|a)*", "(?:b|a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(b|a)*", "(b|a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(a)(?:b|a)*", "(a)(?:b|a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(a)(b|a)*", "(a)(b|a)*")->Arg(100)->Arg(200)->Arg(400); | ||
| BENCHMARK_CAPTURE(bm_match_sequence_of_as, "(a)(?:b|a)*c", "(a)(?:b|a)*c")->Arg(100)->Arg(200)->Arg(400); | ||
StephanTLavavej marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| BENCHMARK_MAIN(); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1681,6 +1681,8 @@ enum class _Rx_unwind_ops { | |
| _Loop_nongreedy, | ||
| _Loop_greedy, | ||
| _Loop_restore_vals, | ||
| _Capture_restore_begin, | ||
| _Capture_restore_end | ||
StephanTLavavej marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| }; | ||
|
|
||
| template <class _BidIt> | ||
|
|
@@ -1689,7 +1691,7 @@ public: | |
| _Rx_unwind_ops _Code; | ||
| int _Loop_idx_sav; | ||
| _Node_base* _Node; | ||
| _Tgt_state_t<_BidIt> _Match_state; | ||
| _Bt_state_t<_BidIt> _Match_state; | ||
| size_t _Loop_frame_idx_sav; | ||
| }; | ||
|
|
||
|
|
@@ -3919,25 +3921,40 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N | |
| } | ||
|
|
||
| case _N_end_assert: | ||
| for (;;) { | ||
| --_Frames_count; | ||
| const auto& _Frame = _Frames[_Frames_count]; | ||
| const auto _Code = _Frame._Code; | ||
| if (_Code == _Rx_unwind_ops::_After_assert || _Code == _Rx_unwind_ops::_After_neg_assert) { | ||
| _Tgt_state._Cur = _Frame._Match_state._Cur; | ||
| _Decrease_stack_usage_count(); | ||
| if (_Code == _Rx_unwind_ops::_After_assert) { | ||
| _Next = _Frame._Node->_Next; | ||
| } else { | ||
| _Failed = true; | ||
| { | ||
| size_t _Last_capture_restore_frame = 0U; | ||
| for (;;) { | ||
| --_Frames_count; | ||
| const auto& _Frame = _Frames[_Frames_count]; | ||
| const auto _Code = _Frame._Code; | ||
| if (_Code == _Rx_unwind_ops::_After_assert || _Code == _Rx_unwind_ops::_After_neg_assert) { | ||
| _Tgt_state._Cur = _Frame._Match_state._Cur; | ||
| _Decrease_stack_usage_count(); | ||
| if (_Code == _Rx_unwind_ops::_After_assert) { | ||
| _Next = _Frame._Node->_Next; | ||
| if (_Last_capture_restore_frame != 0U) { | ||
| _Frames_count = static_cast<size_t>( | ||
| _STD remove_if(_Frames.begin() + static_cast<ptrdiff_t>(_Frames_count), | ||
| _Frames.begin() + static_cast<ptrdiff_t>(_Last_capture_restore_frame) + 1, | ||
| [](const auto& _Other_frame) { | ||
StephanTLavavej marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return _Other_frame._Code != _Rx_unwind_ops::_Capture_restore_begin | ||
| && _Other_frame._Code != _Rx_unwind_ops::_Capture_restore_end; | ||
| }) | ||
| - _Frames.begin()); | ||
StephanTLavavej marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } else { | ||
| _Failed = true; | ||
| } | ||
| break; | ||
| } else if (_Code == _Rx_unwind_ops::_Disjunction_eval_alt_on_failure | ||
| || _Code == _Rx_unwind_ops::_Disjunction_eval_alt_always | ||
| || _Code == _Rx_unwind_ops::_Loop_greedy // | ||
| || _Code == _Rx_unwind_ops::_Loop_nongreedy | ||
| || _Code == _Rx_unwind_ops::_Loop_restore_vals) { | ||
| _Decrease_stack_usage_count(); | ||
| } else if (_Code == _Rx_unwind_ops::_Capture_restore_end && _Last_capture_restore_frame == 0U) { | ||
| _Last_capture_restore_frame = _Frames_count; | ||
| } | ||
| break; | ||
| } else if (_Code == _Rx_unwind_ops::_Disjunction_eval_alt_on_failure | ||
| || _Code == _Rx_unwind_ops::_Disjunction_eval_alt_always | ||
| || _Code == _Rx_unwind_ops::_Loop_greedy // | ||
| || _Code == _Rx_unwind_ops::_Loop_nongreedy | ||
| || _Code == _Rx_unwind_ops::_Loop_restore_vals) { | ||
| _Decrease_stack_usage_count(); | ||
| } | ||
| } | ||
| break; | ||
|
|
@@ -3946,7 +3963,10 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N | |
| { // record current position | ||
| _Node_capture* _Node = static_cast<_Node_capture*>(_Nx); | ||
| if (_Node->_Idx != 0U) { | ||
| _Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur; | ||
| auto& _Group = _Tgt_state._Grps[_Node->_Idx]; | ||
| auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Capture_restore_begin, _Node); | ||
| _Frames[_Frame_idx]._Match_state._Cur = _Group._Begin; | ||
| _Group._Begin = _Tgt_state._Cur; | ||
|
Comment on lines
+3969
to
+3970
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No change requested: I observe that |
||
| } | ||
| break; | ||
| } | ||
|
|
@@ -3956,8 +3976,11 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N | |
| _Node_end_group* _Node = static_cast<_Node_end_group*>(_Nx); | ||
| _Node_capture* _Node0 = static_cast<_Node_capture*>(_Node->_Back); | ||
| if (_Node0->_Idx != 0U) { // update capture data | ||
| _Tgt_state._Grp_valid[_Node0->_Idx] = true; | ||
| _Tgt_state._Grps[_Node0->_Idx]._End = _Tgt_state._Cur; | ||
| auto& _Group = _Tgt_state._Grps[_Node0->_Idx]; | ||
| auto _Frame_idx = _Push_frame(_Rx_unwind_ops::_Capture_restore_end, _Node0); | ||
| _Frames[_Frame_idx]._Match_state._Cur = _Group._End; | ||
| _Tgt_state._Grp_valid[_Node0->_Idx] = true; | ||
| _Group._End = _Tgt_state._Cur; | ||
| } | ||
| break; | ||
| } | ||
|
|
@@ -4325,6 +4348,20 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N | |
| } | ||
| break; | ||
|
|
||
| case _Rx_unwind_ops::_Capture_restore_begin: | ||
| { // restore begin of capturing group | ||
| auto _Node = static_cast<_Node_capture*>(_Frame._Node); | ||
| _Tgt_state._Grps[_Node->_Idx]._Begin = _Frame._Match_state._Cur; | ||
| } | ||
| break; | ||
|
|
||
| case _Rx_unwind_ops::_Capture_restore_end: | ||
| { // restore end of capturing group | ||
| auto _Node = static_cast<_Node_capture*>(_Frame._Node); | ||
| _Tgt_state._Grps[_Node->_Idx]._End = _Frame._Match_state._Cur; | ||
| } | ||
| break; | ||
|
|
||
| default: | ||
| #if _ITERATOR_DEBUG_LEVEL != 0 | ||
| _STL_REPORT_ERROR("internal stack of regex matcher corrupted"); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.