Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ add_benchmark(nth_element src/nth_element.cpp)
add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
add_benchmark(regex_search src/regex_search.cpp)
add_benchmark(remove src/remove.cpp)
add_benchmark(replace src/replace.cpp)
add_benchmark(reverse src/reverse.cpp)
Expand Down
38 changes: 38 additions & 0 deletions benchmarks/src/regex_search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <benchmark/benchmark.h>
#include <regex>
#include <string>

#include "lorem.hpp"

using namespace std;

void bm_lorem_search(benchmark::State& state, const char* pattern) {
string repeated_lorem{lorem_ipsum};
for (long long i = 0; i < state.range(); ++i) {
repeated_lorem += repeated_lorem;
}
regex re{pattern};

for (auto _ : state) {
benchmark::DoNotOptimize(repeated_lorem);
const char* pos = repeated_lorem.data();
const char* end = repeated_lorem.data() + repeated_lorem.size();
cmatch match;
for (; regex_search(pos, end, match, re); ++pos) {
benchmark::DoNotOptimize(match);
pos = match[0].second;
if (pos == end) {
break;
}
}
}
}

BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4);

BENCHMARK_MAIN();
10 changes: 9 additions & 1 deletion stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -4128,6 +4128,15 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
break;
}

case _N_rep:
{
_Node_rep* _Node = static_cast<_Node_rep*>(_Nx);
if (_Node->_Min == 0) {
return _First_arg;
}
break;
}

case _N_begin:
break;

Expand All @@ -4139,7 +4148,6 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
case _N_neg_assert:
case _N_back:
case _N_endif:
case _N_rep:
case _N_end_rep:
default:
return _First_arg;
Expand Down
12 changes: 7 additions & 5 deletions tests/std/include/test_regex_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,9 @@ class test_regex {
submatches_success = false;
break;
}
} else if (!actual_capture.matched || actual_capture.first != (mr[0].first + expected_capture.first)
|| actual_capture.second != (mr[0].first + expected_capture.second)) {
} else if (!actual_capture.matched
|| actual_capture.first != (subject.begin() + expected_capture.first)
|| actual_capture.second != (subject.begin() + expected_capture.second)) {
submatches_success = false;
break;
}
Expand All @@ -297,7 +298,8 @@ class test_regex {
for (const auto& expected_capture : capture_groups) {
std::string capture = "(unmatched)";
if (expected_capture.first != -1) {
capture.assign(mr[0].first + expected_capture.first, mr[0].first + expected_capture.second);
capture.assign(
subject.begin() + expected_capture.first, subject.begin() + expected_capture.second);
}
printf(R"(%s"%s" [%td %td])", initial ? "" : ", ", capture.c_str(), expected_capture.first,
expected_capture.second);
Expand All @@ -313,8 +315,8 @@ class test_regex {
std::ptrdiff_t last = -1;
if (actual_capture.matched) {
capture = actual_capture.str();
first = actual_capture.first - mr[0].first;
last = actual_capture.second - mr[0].first;
first = actual_capture.first - subject.begin();
last = actual_capture.second - subject.begin();
}
printf(R"(%s"%s" [%td %td])", initial ? "" : ", ", capture.c_str(), first, last);
initial = false;
Expand Down
49 changes: 49 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,54 @@ void test_gh_5490() {
}
}

void test_gh_5509() {
// GH-5509 extended the matcher's skip optimization
// to regexes starting with a loop with at least one repetition,
// speeding up searches for such regexes

{
test_regex char_plus_regex(&g_regexTester, "(a+)");
char_plus_regex.should_search_match_capture_groups("blwerofaaweraf", "aa", match_default, {{7, 9}});
char_plus_regex.should_search_fail("blwerofwerf");
}

{
test_regex charclass_plus_regex(&g_regexTester, "([fa]+)");
charclass_plus_regex.should_search_match_capture_groups("blwerofaaweraf", "faa", match_default, {{6, 9}});
charclass_plus_regex.should_search_fail("blwerower");
}

{
test_regex string_plus_regex(&g_regexTester, "((?:aw)+)");
string_plus_regex.should_search_match_capture_groups("blwerofaawaweraf", "awaw", match_default, {{8, 12}});
string_plus_regex.should_search_fail("blwerofaerwaf");
}

{
test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)");
anchored_string_plus_regex.should_search_match_capture_groups(
"blwerofa\nawaweraf", "aw", match_default, {{9, 11}});
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
}

{
test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)");
anchored_string_plus_regex.should_search_match_capture_groups(
"blwerofa\nawaweraf", "\naw", match_default, {{8, 11}});
anchored_string_plus_regex.should_search_fail("blwerof\naerwaf");
}

{
test_regex string_star_string_regex(&g_regexTester, "((?:aw)*fa)");
string_star_string_regex.should_search_match_capture_groups(
"blwerofaawawfaeraf", "fa", match_default, {{6, 8}});
string_star_string_regex.should_search_match_capture_groups(
"blweroawawfaeraf", "awawfa", match_default, {{6, 12}});
string_star_string_regex.should_search_match("blwerofaerwaf", "fa");
string_star_string_regex.should_search_fail("blweroerwaf");
}
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -1936,6 +1984,7 @@ int main() {
test_gh_5374();
test_gh_5377();
test_gh_5490();
test_gh_5509();

return g_regexTester.result();
}