From 9e8e9122be602c3c26e5c629b8a1d0de52f76ca2 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 19:41:25 +0200 Subject: [PATCH 01/15] test --- .../VSO_0000000_vector_algorithms/test.cpp | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 27ba7340b3f..7dbc8f89d2f 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -103,6 +103,67 @@ void test_adjacent_difference_with_heterogeneous_types() { assert(output == expected); } +template +InIt last_known_good_adj_find(InIt first, InIt last) { + if (first == last) { + return last; + } + + auto next = first; + for (++next; next != last; ++first, ++next) { + if (*first == *next) { + return first; + } + } + + return last; +} + +template +void test_case_adj_find(const vector& input) { + const auto actual = adjacent_find(input.begin(), input.end()); + const auto expected = last_known_good_adj_find(input.begin(), input.end()); + assert(actual == expected); + +#if _HAS_CXX20 + auto actual_r = ranges::adjacent_find(input); + assert(actual_r == expected); +#endif // _HAS_CXX20 +} + +template +void test_adjacent_find(mt19937_64& gen) { + constexpr size_t replicaCount = 4; + + using Limits = numeric_limits; + + uniform_int_distribution> dis(Limits::min(), Limits::max()); + + vector master_input; + vector input; + + master_input.reserve(dataCount); + input.reserve(dataCount); + + test_case_adj_find(input); + for (size_t attempts = 0; attempts < dataCount; ++attempts) { + master_input.push_back(static_cast(dis(gen))); + input = master_input; + + test_case_adj_find(input); + + if (master_input.size() > 2) { + uniform_int_distribution pos_dis(0, master_input.size() - 2); + + for (size_t replicas = 0; replicas < replicaCount; ++replicas) { + size_t replica_pos = pos_dis(gen); + input[replica_pos] = input[replica_pos + 1]; + test_case_adj_find(input); + } + } + } +} + template ptrdiff_t last_known_good_count(FwdIt first, FwdIt last, T v) { ptrdiff_t result = 0; @@ -761,6 +822,16 @@ void test_vector_algorithms(mt19937_64& gen) { test_adjacent_difference_with_heterogeneous_types(); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_adjacent_find(gen); + test_count(gen); test_count(gen); test_count(gen); From 8ed4ff38563149abf84b336f4781805ef8491622 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 20:09:24 +0200 Subject: [PATCH 02/15] benchmark --- benchmarks/CMakeLists.txt | 1 + benchmarks/src/adjacent_find.cpp | 42 ++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 benchmarks/src/adjacent_find.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 4e4f47894ed..b8f5ffbaf51 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -107,6 +107,7 @@ function(add_benchmark name) endfunction() add_benchmark(adjacent_difference src/adjacent_difference.cpp) +add_benchmark(adjacent_find src/adjacent_find.cpp) add_benchmark(bitset_from_string src/bitset_from_string.cpp) add_benchmark(bitset_to_string src/bitset_to_string.cpp) add_benchmark(efficient_nonlocking_print src/efficient_nonlocking_print.cpp) diff --git a/benchmarks/src/adjacent_find.cpp b/benchmarks/src/adjacent_find.cpp new file mode 100644 index 00000000000..abf78239f11 --- /dev/null +++ b/benchmarks/src/adjacent_find.cpp @@ -0,0 +1,42 @@ +// Copyright(c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +using namespace std; + +template +void bm(benchmark::State& state) { + const size_t size = static_cast(state.range(0)); + const size_t pos = static_cast(state.range(1)); + + vector v(size); + + for (size_t i = 0; i != size; ++i) { + v[i] = static_cast(i & 3); + } + + if (pos == 0 || pos >= size) { + abort(); + } + + v[pos] = v[pos - 1]; + + for (auto _ : state) { + benchmark::DoNotOptimize(v); + benchmark::DoNotOptimize(adjacent_find(v.begin(), v.end())); + } +} + +void common_args(auto bm) { + bm->ArgPair(2525, 1142); +} + +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); + +BENCHMARK_MAIN(); From 9be8b884b6d3079cd8bfc2857df0466006425189 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 20:44:48 +0200 Subject: [PATCH 03/15] vectorization --- stl/inc/algorithm | 22 +++++-- stl/inc/xutility | 20 +++++++ stl/src/vector_algorithms.cpp | 104 ++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 4 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 0bfef44a07b..a12717adc57 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -542,10 +542,24 @@ _NODISCARD _CONSTEXPR20 _FwdIt adjacent_find(const _FwdIt _First, _FwdIt _Last, auto _UFirst = _STD _Get_unwrapped(_First); auto _ULast = _STD _Get_unwrapped(_Last); if (_UFirst != _ULast) { - for (auto _UNext = _UFirst; ++_UNext != _ULast; _UFirst = _UNext) { - if (_Pred(*_UFirst, *_UNext)) { - _ULast = _UFirst; - break; +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe<_FwdIt, _FwdIt, _Pr>) { + const auto _First_ptr = _STD _To_address(_UFirst); + const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _STD _To_address(_ULast)); + + if constexpr (is_pointer_v) { + _ULast = _Result; + } else { + _ULast = _UFirst + (_Result - _First_ptr); + } + } else +#endif // _USE_STD_VECTOR_ALGORITHMS + { + for (auto _UNext = _UFirst; ++_UNext != _ULast; _UFirst = _UNext) { + if (_Pred(*_UFirst, *_UNext)) { + _ULast = _UFirst; + break; + } } } } diff --git a/stl/inc/xutility b/stl/inc/xutility index 966a1d66036..bebd586e270 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -98,6 +98,11 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; +const void* __stdcall __std_adjacent_find_1(const void* _First, const void* _Last) noexcept; +const void* __stdcall __std_adjacent_find_2(const void* _First, const void* _Last) noexcept; +const void* __stdcall __std_adjacent_find_4(const void* _First, const void* _Last) noexcept; +const void* __stdcall __std_adjacent_find_8(const void* _First, const void* _Last) noexcept; + const void* __stdcall __std_search_1( const void* _First1, const void* _Last1, const void* _First2, size_t _Count2) noexcept; const void* __stdcall __std_search_2( @@ -240,6 +245,21 @@ _Ty* _Find_last_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val } } +template +_Ty* _Adjacent_find_vectorized(_Ty* const _First, _Ty* const _Last) noexcept { + if constexpr (sizeof(_Ty) == 1) { + return const_cast<_Ty*>(static_cast(::__std_adjacent_find_1(_First, _Last))); + } else if constexpr (sizeof(_Ty) == 2) { + return const_cast<_Ty*>(static_cast(::__std_adjacent_find_2(_First, _Last))); + } else if constexpr (sizeof(_Ty) == 4) { + return const_cast<_Ty*>(static_cast(::__std_adjacent_find_4(_First, _Last))); + } else if constexpr (sizeof(_Ty) == 8) { + return const_cast<_Ty*>(static_cast(::__std_adjacent_find_8(_First, _Last))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // unexpected size + } +} + // find_first_of vectorization is likely to be a win after this size (in elements) _INLINE_VAR constexpr ptrdiff_t _Threshold_find_first_of = 16; diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 41c2daac2f6..bd084ec5b45 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2705,6 +2705,94 @@ namespace { } } + template + const void* __stdcall __std_adjacent_find_impl(const void* _First, const void* const _Last) noexcept { + if (_First == _Last) { + return _Last; + } + + const size_t _Size_bytes = _Byte_length(_First, _Last) - sizeof(_Ty); + + if (const size_t _Avx_size = _Size_bytes & ~size_t{0x1F}; _Avx_size != 0 && _Use_avx2()) { + _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 + + const void* _Stop_at = _First; + _Advance_bytes(_Stop_at, _Avx_size); + + do { + const void* _Next = _First; + _Advance_bytes(_Next, sizeof(_Ty)); + + const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); + const __m256i _Comparand = _mm256_loadu_si256(static_cast(_Next)); + const int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + + if (_Bingo != 0) { + const unsigned long _Offset = _tzcnt_u32(_Bingo); + _Advance_bytes(_First, _Offset); + return _First; + } + + _Advance_bytes(_First, 32); + } while (_First != _Stop_at); + + if (const size_t _Avx_tail_size = _Size_bytes & 0x1C; _Avx_tail_size != 0) { + const void* _Next = _First; + _Advance_bytes(_Next, sizeof(_Ty)); + + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); + const __m256i _Comparand = _mm256_maskload_epi32(static_cast(_Next), _Tail_mask); + const int _Bingo = + _mm256_movemask_epi8(_mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask)); + + if (_Bingo != 0) { + const unsigned long _Offset = _tzcnt_u32(_Bingo); + _Advance_bytes(_First, _Offset); + return _First; + } + + _Advance_bytes(_First, _Avx_tail_size); + } + + if constexpr (sizeof(_Ty) >= 4) { + return _Last; + } + } else if (const size_t _Sse_size = _Size_bytes & ~size_t{0xF}; _Sse_size != 0 && _Use_sse42()) { + const void* _Stop_at = _First; + _Advance_bytes(_Stop_at, _Sse_size); + + do { + const void* _Next = _First; + _Advance_bytes(_Next, sizeof(_Ty)); + + const __m128i _Data = _mm_loadu_si128(static_cast(_First)); + const __m128i _Comparand = _mm_loadu_si128(static_cast(_Next)); + const int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + + if (_Bingo != 0) { + unsigned long _Offset; + // CodeQL [SM02313] _Offset is always initialized: we just tested `if (_Bingo != 0)`. + _BitScanForward(&_Offset, _Bingo); + _Advance_bytes(_First, _Offset); + return _First; + } + + _Advance_bytes(_First, 16); + } while (_First != _Stop_at); + } + + auto _Ptr = static_cast(_First); + auto _Next = _Ptr + 1; + for (; _Next != _Last; ++_Ptr, ++_Next) { + if (*_Ptr == *_Next) { + return _Ptr; + } + } + + return _Last; + } + struct _Count_traits_8 : _Find_traits_8 { #ifndef _M_ARM64EC static __m256i _Sub_avx(const __m256i _Lhs, const __m256i _Rhs) noexcept { @@ -4545,6 +4633,22 @@ const void* __stdcall __std_find_last_trivial_8( return __std_find_last_trivial_impl<_Find_traits_8>(_First, _Last, _Val); } +const void* __stdcall __std_adjacent_find_1(const void* const _First, const void* const _Last) noexcept { + return __std_adjacent_find_impl<_Find_traits_1, uint8_t>(_First, _Last); +} + +const void* __stdcall __std_adjacent_find_2(const void* const _First, const void* const _Last) noexcept { + return __std_adjacent_find_impl<_Find_traits_2, uint16_t>(_First, _Last); +} + +const void* __stdcall __std_adjacent_find_4(const void* const _First, const void* const _Last) noexcept { + return __std_adjacent_find_impl<_Find_traits_4, uint32_t>(_First, _Last); +} + +const void* __stdcall __std_adjacent_find_8(const void* const _First, const void* const _Last) noexcept { + return __std_adjacent_find_impl<_Find_traits_8, uint64_t>(_First, _Last); +} + __declspec(noalias) size_t __stdcall __std_count_trivial_1( const void* const _First, const void* const _Last, const uint8_t _Val) noexcept { return __std_count_trivial_impl<_Count_traits_1>(_First, _Last, _Val); From 9d67a5c988d7e39e0f7ffef7dfdc37fe5f698333 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 21:12:13 +0200 Subject: [PATCH 04/15] ranges benchmark --- benchmarks/src/adjacent_find.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/benchmarks/src/adjacent_find.cpp b/benchmarks/src/adjacent_find.cpp index abf78239f11..d6027d127e5 100644 --- a/benchmarks/src/adjacent_find.cpp +++ b/benchmarks/src/adjacent_find.cpp @@ -7,7 +7,9 @@ using namespace std; -template +enum class AlgType { Std, Rng }; + +template void bm(benchmark::State& state) { const size_t size = static_cast(state.range(0)); const size_t pos = static_cast(state.range(1)); @@ -26,7 +28,11 @@ void bm(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(v); - benchmark::DoNotOptimize(adjacent_find(v.begin(), v.end())); + if constexpr (Alg == AlgType::Std) { + benchmark::DoNotOptimize(adjacent_find(v.begin(), v.end())); + } else { + benchmark::DoNotOptimize(ranges::adjacent_find(v)); + } } } @@ -34,9 +40,14 @@ void common_args(auto bm) { bm->ArgPair(2525, 1142); } -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); + +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); BENCHMARK_MAIN(); From c6d131653886efa153d423a4edc6a25770f3f893 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 21:23:33 +0200 Subject: [PATCH 05/15] vectorize ranges --- stl/inc/xutility | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index bebd586e270..df8ce4e5f9e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6803,13 +6803,30 @@ namespace ranges { return _First; } - for (auto _Next = _First;; ++_First) { - if (++_Next == _Last) { - return _Next; +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe<_It, _It, _Pr> && sized_sentinel_for<_Se, _It> + && is_same_v<_Pj, identity>) { + const auto _First_ptr = _STD _To_address(_First); + const auto _Last_ptr = _First_ptr + (_Last - _First); + + const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _Last_ptr); + if constexpr (is_pointer_v<_It>) { + return _Result; + } else { + return _First + (_Result - _First_ptr); } + } else +#endif // _USE_STD_VECTOR_ALGORITHMS + { + for (auto _Next = _First;; ++_First) { - if (_STD invoke(_Pred, _STD invoke(_Proj, *_First), _STD invoke(_Proj, *_Next))) { - return _First; + if (++_Next == _Last) { + return _Next; + } + + if (_STD invoke(_Pred, _STD invoke(_Proj, *_First), _STD invoke(_Proj, *_Next))) { + return _First; + } } } } From 87391035a9bef6694084a8a46bbde402b600e09a Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 11 Mar 2025 22:06:44 +0200 Subject: [PATCH 06/15] constexpr --- stl/inc/algorithm | 30 +++++++++++++++++------------- stl/inc/xutility | 33 +++++++++++++++++---------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index a12717adc57..d92ab38c3c6 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -544,22 +544,26 @@ _NODISCARD _CONSTEXPR20 _FwdIt adjacent_find(const _FwdIt _First, _FwdIt _Last, if (_UFirst != _ULast) { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe<_FwdIt, _FwdIt, _Pr>) { - const auto _First_ptr = _STD _To_address(_UFirst); - const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _STD _To_address(_ULast)); + if (!_STD _Is_constant_evaluated()) { + const auto _First_ptr = _STD _To_address(_UFirst); + const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _STD _To_address(_ULast)); - if constexpr (is_pointer_v) { - _ULast = _Result; - } else { - _ULast = _UFirst + (_Result - _First_ptr); + if constexpr (is_pointer_v) { + _ULast = _Result; + } else { + _ULast = _UFirst + (_Result - _First_ptr); + } + + _STD _Seek_wrapped(_Last, _ULast); + return _Last; } - } else + } #endif // _USE_STD_VECTOR_ALGORITHMS - { - for (auto _UNext = _UFirst; ++_UNext != _ULast; _UFirst = _UNext) { - if (_Pred(*_UFirst, *_UNext)) { - _ULast = _UFirst; - break; - } + + for (auto _UNext = _UFirst; ++_UNext != _ULast; _UFirst = _UNext) { + if (_Pred(*_UFirst, *_UNext)) { + _ULast = _UFirst; + break; } } } diff --git a/stl/inc/xutility b/stl/inc/xutility index df8ce4e5f9e..a263b6ba07b 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6806,27 +6806,28 @@ namespace ranges { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe<_It, _It, _Pr> && sized_sentinel_for<_Se, _It> && is_same_v<_Pj, identity>) { - const auto _First_ptr = _STD _To_address(_First); - const auto _Last_ptr = _First_ptr + (_Last - _First); + if (!_STD is_constant_evaluated()) { + const auto _First_ptr = _STD _To_address(_First); + const auto _Last_ptr = _First_ptr + (_Last - _First); - const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _Last_ptr); - if constexpr (is_pointer_v<_It>) { - return _Result; - } else { - return _First + (_Result - _First_ptr); + const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _Last_ptr); + if constexpr (is_pointer_v<_It>) { + return _Result; + } else { + return _First + (_Result - _First_ptr); + } } - } else + } #endif // _USE_STD_VECTOR_ALGORITHMS - { - for (auto _Next = _First;; ++_First) { - if (++_Next == _Last) { - return _Next; - } + for (auto _Next = _First;; ++_First) { - if (_STD invoke(_Pred, _STD invoke(_Proj, *_First), _STD invoke(_Proj, *_Next))) { - return _First; - } + if (++_Next == _Last) { + return _Next; + } + + if (_STD invoke(_Pred, _STD invoke(_Proj, *_First), _STD invoke(_Proj, *_Next))) { + return _First; } } } From 878f06c6c408a5ec4268f5b9f7c5ad97eca9ac77 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 13:47:06 -0700 Subject: [PATCH 07/15] Remove stray newline. --- stl/inc/xutility | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 765fd28782b..222054fefaf 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6824,7 +6824,6 @@ namespace ranges { #endif // _USE_STD_VECTOR_ALGORITHMS for (auto _Next = _First;; ++_First) { - if (++_Next == _Last) { return _Next; } From d53c9057e9dfce4f2d98319e8a179c7e1fa23c09 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 13:49:24 -0700 Subject: [PATCH 08/15] Fix banner inconsistency. --- benchmarks/src/adjacent_find.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/adjacent_find.cpp b/benchmarks/src/adjacent_find.cpp index d6027d127e5..6abb82ff914 100644 --- a/benchmarks/src/adjacent_find.cpp +++ b/benchmarks/src/adjacent_find.cpp @@ -1,4 +1,4 @@ -// Copyright(c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include From cdfb667e064e048dd1669104f9843ffe52023825 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:11:21 -0700 Subject: [PATCH 09/15] Include more headers. --- benchmarks/src/adjacent_find.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/src/adjacent_find.cpp b/benchmarks/src/adjacent_find.cpp index 6abb82ff914..500bdcfa696 100644 --- a/benchmarks/src/adjacent_find.cpp +++ b/benchmarks/src/adjacent_find.cpp @@ -3,6 +3,8 @@ #include #include +#include +#include #include using namespace std; From 266cbd2bd8ef9748781a012b87ec59ca875c4e01 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:17:50 -0700 Subject: [PATCH 10/15] Use cstdint for clarity. --- benchmarks/src/adjacent_find.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/benchmarks/src/adjacent_find.cpp b/benchmarks/src/adjacent_find.cpp index 500bdcfa696..67036be67bf 100644 --- a/benchmarks/src/adjacent_find.cpp +++ b/benchmarks/src/adjacent_find.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -42,14 +43,14 @@ void common_args(auto bm) { bm->ArgPair(2525, 1142); } -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); -BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); +BENCHMARK(bm)->Apply(common_args); BENCHMARK_MAIN(); From aeae87b54f68fd827c0e7846e8eb75512d77c04c Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:29:06 -0700 Subject: [PATCH 11/15] `_Equal_memcmp_is_safe` must inspect unwrapped iterators. --- stl/inc/algorithm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 10f029aa75a..f4f7b097c88 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -543,7 +543,7 @@ _NODISCARD _CONSTEXPR20 _FwdIt adjacent_find(const _FwdIt _First, _FwdIt _Last, auto _ULast = _STD _Get_unwrapped(_Last); if (_UFirst != _ULast) { #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe<_FwdIt, _FwdIt, _Pr>) { + if constexpr (_Equal_memcmp_is_safe) { if (!_STD _Is_constant_evaluated()) { const auto _First_ptr = _STD _To_address(_UFirst); const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _STD _To_address(_ULast)); From 5407a61bd1e2c06a4a94c1d6b1fe50c324aa2d95 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:44:40 -0700 Subject: [PATCH 12/15] Rename to original_input. --- .../std/tests/VSO_0000000_vector_algorithms/test.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 7551a3e028b..fee5910d78b 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -139,21 +139,21 @@ void test_adjacent_find(mt19937_64& gen) { uniform_int_distribution> dis(Limits::min(), Limits::max()); - vector master_input; + vector original_input; vector input; - master_input.reserve(dataCount); + original_input.reserve(dataCount); input.reserve(dataCount); test_case_adj_find(input); for (size_t attempts = 0; attempts < dataCount; ++attempts) { - master_input.push_back(static_cast(dis(gen))); - input = master_input; + original_input.push_back(static_cast(dis(gen))); + input = original_input; test_case_adj_find(input); - if (master_input.size() > 2) { - uniform_int_distribution pos_dis(0, master_input.size() - 2); + if (original_input.size() > 2) { + uniform_int_distribution pos_dis(0, original_input.size() - 2); for (size_t replicas = 0; replicas < replicaCount; ++replicas) { size_t replica_pos = pos_dis(gen); From d3b0e337d97f01c72d7b427ba19836dbe83fb092 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:51:21 -0700 Subject: [PATCH 13/15] InIt => FwdIt. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index fee5910d78b..8960e0cbfc9 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -103,8 +103,8 @@ void test_adjacent_difference_with_heterogeneous_types() { assert(output == expected); } -template -InIt last_known_good_adj_find(InIt first, InIt last) { +template +FwdIt last_known_good_adj_find(FwdIt first, FwdIt last) { if (first == last) { return last; } From e0a9ec8d03882f3243d3ca92d82fc51d50867e2b Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Mon, 17 Mar 2025 14:46:37 -0700 Subject: [PATCH 14/15] Add const. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 8960e0cbfc9..ab906648084 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -126,7 +126,7 @@ void test_case_adj_find(const vector& input) { assert(actual == expected); #if _HAS_CXX20 - auto actual_r = ranges::adjacent_find(input); + const auto actual_r = ranges::adjacent_find(input); assert(actual_r == expected); #endif // _HAS_CXX20 } @@ -156,8 +156,8 @@ void test_adjacent_find(mt19937_64& gen) { uniform_int_distribution pos_dis(0, original_input.size() - 2); for (size_t replicas = 0; replicas < replicaCount; ++replicas) { - size_t replica_pos = pos_dis(gen); - input[replica_pos] = input[replica_pos + 1]; + const size_t replica_pos = pos_dis(gen); + input[replica_pos] = input[replica_pos + 1]; test_case_adj_find(input); } } From 4222a060b1ff4a13eae7b9caef7067f721fd00d2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sat, 22 Mar 2025 06:09:26 -0700 Subject: [PATCH 15/15] Add missing `_M_ARM64EC` guards. --- stl/src/vector_algorithms.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index bd084ec5b45..229b58fb279 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2711,6 +2711,7 @@ namespace { return _Last; } +#ifndef _M_ARM64EC const size_t _Size_bytes = _Byte_length(_First, _Last) - sizeof(_Ty); if (const size_t _Avx_size = _Size_bytes & ~size_t{0x1F}; _Avx_size != 0 && _Use_avx2()) { @@ -2781,6 +2782,7 @@ namespace { _Advance_bytes(_First, 16); } while (_First != _Stop_at); } +#endif // !_M_ARM64EC auto _Ptr = static_cast(_First); auto _Next = _Ptr + 1;