Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vectorize std::search of 1 and 2 bytes elements with pcmpestri #4745

Merged
merged 49 commits into from
Sep 9, 2024
Merged
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
73c96da
vectorize search
AlexGuteniev May 5, 2024
0c17a53
very tail fix
AlexGuteniev May 5, 2024
11c05ee
I 🧡 ADL
AlexGuteniev May 5, 2024
d4fcc96
unify ipsum
AlexGuteniev May 5, 2024
da5cf2e
-newline
AlexGuteniev May 5, 2024
da157b1
`strstr` for competition
AlexGuteniev May 5, 2024
772c513
missing progress
AlexGuteniev May 5, 2024
2c6c329
coverage
AlexGuteniev May 5, 2024
81a6000
these tests are too long
AlexGuteniev May 5, 2024
0b59b2e
missing include
AlexGuteniev May 5, 2024
f2806c5
default_searcher
AlexGuteniev May 5, 2024
15e54a9
ADL again
AlexGuteniev May 5, 2024
26646fe
avoid `memcmp` in fallback
AlexGuteniev May 5, 2024
0c473a4
partial review comment
AlexGuteniev Jun 7, 2024
3452fcc
Merge branch 'main' into search
StephanTLavavej Jun 10, 2024
629afd4
Internal static assert `sizeof(_Ty1) == sizeof(_Ty2)`.
StephanTLavavej Jun 10, 2024
a24e6eb
Use `+=` and `+` instead of `_RANGES next`.
StephanTLavavej Jun 10, 2024
9d07a40
Style: Return `_Ptr_res1` instead of `_Ptr_last1` when they're equal.
StephanTLavavej Jun 10, 2024
d57f9b6
Style: In `<algorithm>` and `<functional>`, `_Ptr_last1` doesn't need…
StephanTLavavej Jun 10, 2024
e51b98d
Restore top-level constness for `_UFirst2`.
StephanTLavavej Jun 10, 2024
d4462a5
Benchmark classic search().
StephanTLavavej Jun 10, 2024
95ba820
Simplify `last_known_good_search()`.
StephanTLavavej Jun 10, 2024
72a0d29
Revert vectorized implementation.
StephanTLavavej Jun 10, 2024
38b32d6
Drop `memcmp` paths from `_Equal_rev_pred_unchecked` and `_Equal_rev_…
StephanTLavavej Jun 10, 2024
1e16233
Merge remote-tracking branch 'upstream/main' into search
AlexGuteniev Jun 20, 2024
f269d6c
Revert "Revert vectorized implementation."
AlexGuteniev Jun 20, 2024
dc7eb5b
drop 4 and 8 bytes search optimization for now
AlexGuteniev Jun 24, 2024
0926486
SSE4.2 madness
AlexGuteniev Jun 24, 2024
ba63dbb
better approach
AlexGuteniev Jun 25, 2024
c293748
elegant tail
AlexGuteniev Jun 25, 2024
004d431
big needle benchmark
AlexGuteniev Jun 28, 2024
709ed47
large needle optimization
AlexGuteniev Jun 28, 2024
1c66f01
prevent found data withing overflown part
AlexGuteniev Jun 29, 2024
43e0eec
proper tail length
AlexGuteniev Jun 29, 2024
1420757
better match coverage
AlexGuteniev Jun 29, 2024
fa9d52f
bring back optimization
AlexGuteniev Jun 29, 2024
dfd69e8
i consistent
AlexGuteniev Jun 29, 2024
93cdcf0
Merge branch 'main' into search
StephanTLavavej Sep 4, 2024
96a4d58
Avoid truncation warnings in `_First1 + _Count2`.
StephanTLavavej Sep 5, 2024
2a239a7
Style and comment nitpicks.
StephanTLavavej Sep 5, 2024
3bc1d56
Benchmark: Use a constexpr array of string_view.
StephanTLavavej Sep 5, 2024
c1aaba7
Add const.
StephanTLavavej Sep 5, 2024
6276567
Don't help the compiler - let it deduce `_Ty`.
StephanTLavavej Sep 5, 2024
05e435d
Drop inconsistent `_CSTD`.
StephanTLavavej Sep 5, 2024
e7ec67a
input_needle is guaranteed non-empty here.
StephanTLavavej Sep 5, 2024
9d11dcc
Avoid permanently modifying the haystack.
StephanTLavavej Sep 5, 2024
abae4ed
Bugfix: Use an unaligned load from `_First2`.
StephanTLavavej Sep 5, 2024
e96407b
`_Count2` is more natural than `_Last2`
AlexGuteniev Sep 7, 2024
e0c843d
-hiding
AlexGuteniev Sep 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
default_searcher
AlexGuteniev committed May 5, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit f2806c562fbf97cb6b3f83147c0a61422efc0aba
35 changes: 28 additions & 7 deletions benchmarks/src/search.cpp
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
#include <benchmark/benchmark.h>
#include <cstdint>
#include <cstring>
#include <functional>
#include <vector>

const char src_haystack[] =
@@ -40,7 +41,7 @@ const char src_haystack[] =

const char src_needle[] = "aliquet";

void bm_strstr(benchmark::State& state) {
void c_strstr(benchmark::State& state) {
const std::string haystack(std::begin(src_haystack), std::end(src_haystack));
const std::string needle(std::begin(src_needle), std::end(src_needle));

@@ -53,7 +54,7 @@ void bm_strstr(benchmark::State& state) {
}

template <class T>
void bm(benchmark::State& state) {
void ranges_search(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

@@ -65,10 +66,30 @@ void bm(benchmark::State& state) {
}
}

BENCHMARK(bm_strstr);
BENCHMARK(bm<std::uint8_t>);
BENCHMARK(bm<std::uint16_t>);
BENCHMARK(bm<std::uint32_t>);
BENCHMARK(bm<std::uint64_t>);
template <class T>
void search_default_searcher(benchmark::State& state) {
const std::vector<T> haystack(std::begin(src_haystack), std::end(src_haystack));
const std::vector<T> needle(std::begin(src_needle), std::end(src_needle));

for (auto _ : state) {
benchmark::DoNotOptimize(haystack);
benchmark::DoNotOptimize(needle);
auto res = std::search(haystack.begin(), haystack.end(), std::default_searcher{needle.begin(), needle.end()});
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(c_strstr);

BENCHMARK(ranges_search<std::uint8_t>);
BENCHMARK(ranges_search<std::uint16_t>);
BENCHMARK(ranges_search<std::uint32_t>);
BENCHMARK(ranges_search<std::uint64_t>);

BENCHMARK(search_default_searcher<std::uint8_t>);
BENCHMARK(search_default_searcher<std::uint16_t>);
BENCHMARK(search_default_searcher<std::uint32_t>);
BENCHMARK(search_default_searcher<std::uint64_t>);


BENCHMARK_MAIN();
2 changes: 1 addition & 1 deletion stl/inc/algorithm
Original file line number Diff line number Diff line change
@@ -2161,7 +2161,7 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays
if constexpr (is_pointer_v<decltype(_UFirst1)>) {
_UFirst1 = _Ptr_res1;
} else {
_UFirst1 += _Ptr_last1 - _Ptr_res1;
_UFirst1 += _Ptr_res1 - _Ptr1;
}

_STD _Seek_wrapped(_Last1, _UFirst1);
24 changes: 24 additions & 0 deletions stl/inc/functional
Original file line number Diff line number Diff line change
@@ -2456,6 +2456,30 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked(
_Iter_diff_t<_FwdItHaystack> _Count1 = _Last1 - _First1;
_Iter_diff_t<_FwdItPat> _Count2 = _Last2 - _First2;

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_First1);
const auto _Ptr_last1 = _STD _To_address(_Last1);

const auto _Ptr_res1 =
_Search_vectorized(_Ptr1, _Ptr_last1, _STD _To_address(_First2), _STD _To_address(_Last2));

if constexpr (is_pointer_v<_FwdItHaystack>) {
_First1 = _Ptr_res1;
} else {
_First1 += _Ptr_res1 - _Ptr1;
}

if (_First1 != _Last1) {
return {_First1, _First1 + _Count2};
} else {
return {_Last1, _Last1};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (; _Count2 <= _Count1; ++_First1, (void) --_Count1) { // room for match, try it
_FwdItHaystack _Mid1 = _First1;
for (_FwdItPat _Mid2 = _First2;; ++_Mid1, (void) ++_Mid2) {
2 changes: 1 addition & 1 deletion stl/inc/xutility
Original file line number Diff line number Diff line change
@@ -5384,7 +5384,7 @@ constexpr bool _Equal_memcmp_is_safe_helper =

template <class _Iter1, class _Iter2, class _Pr>
constexpr bool _Equal_memcmp_is_safe =
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, _Pr>;
_Equal_memcmp_is_safe_helper<remove_const_t<_Iter1>, remove_const_t<_Iter2>, remove_const_t<_Pr>>;

template <class _CtgIt1, class _CtgIt2>
_NODISCARD int _Memcmp_ranges(_CtgIt1 _First1, _CtgIt1 _Last1, _CtgIt2 _First2) {
5 changes: 5 additions & 0 deletions tests/std/tests/VSO_0000000_vector_algorithms/test.cpp
Original file line number Diff line number Diff line change
@@ -339,6 +339,11 @@ void test_case_search(const vector<T>& input_haystack, const vector<T>& input_ne
last_known_good_search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
auto actual = search(input_haystack.begin(), input_haystack.end(), input_needle.begin(), input_needle.end());
assert(expected == actual);
#if _HAS_CXX17
auto searcher_actual = search(
input_haystack.begin(), input_haystack.end(), default_searcher{input_needle.begin(), input_needle.end()});
assert(expected == searcher_actual);
#endif // _HAS_CXX17
#if _HAS_CXX20
auto ranges_actual = ranges::search(input_haystack, input_needle);
assert(expected == begin(ranges_actual));