Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ add_benchmark(nth_element src/nth_element.cpp)
add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(regex_search src/regex_search.cpp)
add_benchmark(remove src/remove.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(search src/search.cpp)
Expand Down
38 changes: 38 additions & 0 deletions benchmarks/src/regex_search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <benchmark/benchmark.h>
#include <regex>
#include <string>

#include "lorem.hpp"

using namespace std;

void bm_lorem_search(benchmark::State& state, const char* pattern) {
string repeated_lorem{lorem_ipsum};
for (long long i = 0; i < state.range(); ++i) {
repeated_lorem += repeated_lorem;
}
regex re{pattern};

for (auto _ : state) {
benchmark::DoNotOptimize(repeated_lorem);
const char* pos = repeated_lorem.data();
const char* end = repeated_lorem.data() + repeated_lorem.size();
cmatch match;
for (; regex_search(pos, end, match, re); ++pos) {
benchmark::DoNotOptimize(match);
pos = match[0].second;
if (pos == end) {
break;
}
}
}
}

BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4);

BENCHMARK_MAIN();
10 changes: 9 additions & 1 deletion stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -4131,6 +4131,15 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
break;
}

case _N_rep:
{
_Node_rep* _Node = static_cast<_Node_rep*>(_Nx);
if (_Node->_Min == 0) {
return _First_arg;
}
break;
}

case _N_begin:
break;

Expand All @@ -4142,7 +4151,6 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
case _N_neg_assert:
case _N_back:
case _N_endif:
case _N_rep:
case _N_end_rep:
default:
return _First_arg;
Expand Down
12 changes: 7 additions & 5 deletions tests/std/include/test_regex_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,9 @@ class test_regex {
submatches_success = false;
break;
}
} else if (!actual_capture.matched || actual_capture.first != (mr[0].first + expected_capture.first)
|| actual_capture.second != (mr[0].first + expected_capture.second)) {
} else if (!actual_capture.matched
|| actual_capture.first != (subject.begin() + expected_capture.first)
|| actual_capture.second != (subject.begin() + expected_capture.second)) {
submatches_success = false;
break;
}
Expand All @@ -297,7 +298,8 @@ class test_regex {
for (const auto& expected_capture : capture_groups) {
std::string capture = "(unmatched)";
if (expected_capture.first != -1) {
capture.assign(mr[0].first + expected_capture.first, mr[0].first + expected_capture.second);
capture.assign(
subject.begin() + expected_capture.first, subject.begin() + expected_capture.second);
}
printf(R"(%s"%s" [%td %td])", initial ? "" : ", ", capture.c_str(), expected_capture.first,
expected_capture.second);
Expand All @@ -313,8 +315,8 @@ class test_regex {
std::ptrdiff_t last = -1;
if (actual_capture.matched) {
capture = actual_capture.str();
first = actual_capture.first - mr[0].first;
last = actual_capture.second - mr[0].first;
first = actual_capture.first - subject.begin();
last = actual_capture.second - subject.begin();
}
printf(R"(%s"%s" [%td %td])", initial ? "" : ", ", capture.c_str(), first, last);
initial = false;
Expand Down
49 changes: 49 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1698,6 +1698,54 @@ void test_gh_5377() {
}
}

void test_gh_5509() {
// GH-5507 extended the matcher's skip optimization
// to regexes starting with a loop with at least one repetition,
// speeding up searches for such regexes

{
test_regex char_plus_regex(&g_regexTester, "(a+)");
char_plus_regex.should_search_match_capture_groups("blwerofaaweraf", "aa", match_default, {{7, 9}});
char_plus_regex.should_search_fail("blwerofwerf");
}

{
test_regex charclass_plus_regex(&g_regexTester, "([fa]+)");
charclass_plus_regex.should_search_match_capture_groups("blwerofaaweraf", "faa", match_default, {{6, 9}});
charclass_plus_regex.should_search_fail("blwerower");
}

{
test_regex string_plus_regex(&g_regexTester, "((?:aw)+)");
string_plus_regex.should_search_match_capture_groups("blwerofaawaweraf", "awaw", match_default, {{8, 12}});
string_plus_regex.should_search_fail("blwerofaerwaf");
}

{
test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)");
anchored_string_plus_regex.should_search_match_capture_groups(
"blwerofa\nawaweraf", "aw", match_default, {{9, 11}});
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
}

{
test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)");
anchored_string_plus_regex.should_search_match_capture_groups(
"blwerofa\nawaweraf", "\naw", match_default, {{8, 11}});
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
}

{
test_regex string_star_string_regex(&g_regexTester, "((?:aw)*fa)");
string_star_string_regex.should_search_match_capture_groups(
"blwerofaawawfaeraf", "fa", match_default, {{6, 8}});
string_star_string_regex.should_search_match_capture_groups(
"blweroawawfaeraf", "awawfa", match_default, {{6, 12}});
string_star_string_regex.should_search_match("blwerofaerwaf", "fa");
string_star_string_regex.should_search_fail("blweroerwaf");
}
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -1744,6 +1792,7 @@ int main() {
test_gh_5371();
test_gh_5374();
test_gh_5377();
test_gh_5509();

return g_regexTester.result();
}