Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
29f7aa8
Set up the new macro system.
StephanTLavavej Oct 24, 2025
9df2f9b
Inspect `_VECTORIZED_FOR_X64_X86`.
StephanTLavavej Oct 24, 2025
4efcf9d
Improve comments for vectorization type traits.
StephanTLavavej Oct 24, 2025
11a385e
_VECTORIZED_MISMATCH. (note _Lex_compare_memcmp_classify_elements)
StephanTLavavej Oct 24, 2025
9c4490b
_VECTORIZED_IS_SORTED_UNTIL.
StephanTLavavej Oct 24, 2025
44afd8c
_VECTORIZED_ROTATE.
StephanTLavavej Oct 24, 2025
d58cc6c
_VECTORIZED_SWAP_RANGES.
StephanTLavavej Oct 24, 2025
f79f444
_VECTORIZED_REVERSE.
StephanTLavavej Oct 24, 2025
6817749
_VECTORIZED_REMOVE.
StephanTLavavej Oct 24, 2025
ad78933
_VECTORIZED_INCLUDES.
StephanTLavavej Oct 24, 2025
547fd56
_VECTORIZED_MINMAX.
StephanTLavavej Oct 24, 2025
cbc0361
_VECTORIZED_MINMAX_ELEMENT. (note interaction with _VECTORIZED_MINMAX)
StephanTLavavej Oct 24, 2025
465b4c5
_VECTORIZED_COUNT.
StephanTLavavej Oct 24, 2025
a21bbae
_VECTORIZED_SEARCH.
StephanTLavavej Oct 24, 2025
649a2fd
_VECTORIZED_ADJACENT_FIND.
StephanTLavavej Oct 24, 2025
6747f12
_VECTORIZED_FIND_FIRST_OF.
StephanTLavavej Oct 24, 2025
d410936
_VECTORIZED_FIND_LAST_OF.
StephanTLavavej Oct 24, 2025
b2485ab
_VECTORIZED_FIND.
StephanTLavavej Oct 24, 2025
071af15
_VECTORIZED_SEARCH_N.
StephanTLavavej Oct 24, 2025
94f5ea6
_VECTORIZED_REMOVE_COPY.
StephanTLavavej Oct 24, 2025
d81cf8f
_VECTORIZED_FIND_LAST.
StephanTLavavej Oct 24, 2025
c2a314d
_VECTORIZED_REVERSE_COPY.
StephanTLavavej Oct 24, 2025
2e58939
_VECTORIZED_REPLACE.
StephanTLavavej Oct 24, 2025
86f8a90
_VECTORIZED_FIND_END.
StephanTLavavej Oct 24, 2025
13f8b50
_VECTORIZED_FIND_LAST_NOT_OF.
StephanTLavavej Oct 24, 2025
cbf48ae
_VECTORIZED_UNIQUE.
StephanTLavavej Oct 24, 2025
5af3ecc
_VECTORIZED_UNIQUE_COPY.
StephanTLavavej Oct 24, 2025
741b5b9
_VECTORIZED_BITSET_TO_STRING.
StephanTLavavej Oct 24, 2025
a19efcd
_VECTORIZED_BITSET_FROM_STRING.
StephanTLavavej Oct 24, 2025
834eea4
Remove/shrink _VECTORIZED_FOR_X64_X86 regions.
StephanTLavavej Oct 24, 2025
a8b6015
Guard the helper traits to finish removing _VECTORIZED_FOR_X64_X86.
StephanTLavavej Oct 24, 2025
391b33e
Define the algorithm macros.
StephanTLavavej Oct 24, 2025
c546a48
Unify _VECTORIZED_FIND_LAST_NOT_OF into _VECTORIZED_FIND_LAST.
StephanTLavavej Oct 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions stl/inc/__msvc_string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ struct _WChar_traits : private _Char_traits<_Elem, unsigned short> {
_NODISCARD static _CONSTEXPR17 int compare(_In_reads_(_Count) const _Elem* const _First1,
_In_reads_(_Count) const _Elem* const _First2, const size_t _Count) noexcept /* strengthened */ {
// compare [_First1, _First1 + _Count) with [_First2, ...)
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
if (!_STD _Is_constant_evaluated()) {
// TRANSITION, GH-2289: Use vectorized algorithms for better performance than __builtin_wmemcmp.
const size_t _Pos = _Mismatch_vectorized<sizeof(_Elem)>(_First1, _First2, _Count);
Expand All @@ -370,7 +370,7 @@ struct _WChar_traits : private _Char_traits<_Elem, unsigned short> {
return _First1[_Pos] < _First2[_Pos] ? -1 : +1;
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ _VECTORIZED_MISMATCH ^^^

if constexpr (is_same_v<_Elem, wchar_t>) {
return __builtin_wmemcmp(_First1, _First2, _Count);
Expand Down
14 changes: 7 additions & 7 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI
auto _UFirst1 = _STD _Get_unwrapped(_First1);
const auto _ULast1 = _STD _Get_unwrapped(_Last1);
auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1));
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
if constexpr (_Equal_memcmp_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>);
Expand All @@ -943,7 +943,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI
return {_First1, _First2};
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ _VECTORIZED_MISMATCH ^^^
while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) {
++_UFirst1;
++_UFirst2;
Expand Down Expand Up @@ -991,7 +991,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(
const _CT _Count2 = _ULast2 - _UFirst2;
const auto _Count = static_cast<_Iter_diff_t<_InIt1>>((_STD min) (_Count1, _Count2));
_ULast1 = _UFirst1 + _Count;
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
if constexpr (_Equal_memcmp_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>);
Expand All @@ -1007,7 +1007,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(
return {_First1, _First2};
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ _VECTORIZED_MISMATCH ^^^
while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) {
++_UFirst1;
++_UFirst2;
Expand Down Expand Up @@ -11904,7 +11904,7 @@ namespace ranges {
}

const size_t _Num = (_STD min) (_Num1, _Num2);
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
const auto _First1_ptr = _STD to_address(_First1);
const auto _First2_ptr = _STD to_address(_First2);
const size_t _Pos = _Mismatch_vectorized<sizeof(*_First1_ptr)>(_First1_ptr, _First2_ptr, _Num);
Expand All @@ -11915,10 +11915,10 @@ namespace ranges {
} else {
return _STD invoke(_Pred, _First1_ptr[_Pos], _First2_ptr[_Pos]);
}
#else // ^^^ _VECTORIZED_FOR_X64_X86 / !_VECTORIZED_FOR_X64_X86 vvv
#else // ^^^ _VECTORIZED_MISMATCH / !_VECTORIZED_MISMATCH vvv
const int _Ans = _STD _Memcmp_count(_First1, _First2, _Num);
return _Memcmp_classification_pred{}(_Ans, 0) || (_Ans == 0 && _Num1 < _Num2);
#endif // ^^^ !_VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ !_VECTORIZED_MISMATCH ^^^
}
}

Expand Down
29 changes: 17 additions & 12 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,13 @@ __declspec(noalias) uint64_t __stdcall __std_max_8u(const void* _First, const vo
__declspec(noalias) float __stdcall __std_max_f(const void* _First, const void* _Last) noexcept;
__declspec(noalias) double __stdcall __std_max_d(const void* _First, const void* _Last) noexcept;

#if _VECTORIZED_MISMATCH
__declspec(noalias) size_t __stdcall __std_mismatch_1(const void* _First1, const void* _First2, size_t _Count) noexcept;
__declspec(noalias) size_t __stdcall __std_mismatch_2(const void* _First1, const void* _First2, size_t _Count) noexcept;
__declspec(noalias) size_t __stdcall __std_mismatch_4(const void* _First1, const void* _First2, size_t _Count) noexcept;
__declspec(noalias) size_t __stdcall __std_mismatch_8(const void* _First1, const void* _First2, size_t _Count) noexcept;
#endif // ^^^ _VECTORIZED_MISMATCH ^^^

} // extern "C"
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^

Expand Down Expand Up @@ -466,7 +469,9 @@ auto _Max_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^

#if _VECTORIZED_MISMATCH
template <size_t _Element_size>
size_t _Mismatch_vectorized(const void* const _First1, const void* const _First2, const size_t _Count) noexcept {
if constexpr (_Element_size % 8 == 0) {
Expand All @@ -479,7 +484,7 @@ size_t _Mismatch_vectorized(const void* const _First1, const void* const _First2
return __std_mismatch_1(_First1, _First2, _Count * _Element_size) / _Element_size;
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ _VECTORIZED_MISMATCH ^^^

template <class _Ty>
struct _Get_first_parameter;
Expand Down Expand Up @@ -5793,7 +5798,7 @@ namespace ranges {
_NODISCARD constexpr mismatch_result<_It1, _It2> _Mismatch_n(
_It1 _First1, _It2 _First2, iter_difference_t<_It1> _Count, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) {
_STL_INTERNAL_CHECK(_Count >= 0);
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity>
&& is_same_v<_Pj2, identity>) {
if (!_STD is_constant_evaluated()) {
Expand All @@ -5806,7 +5811,7 @@ namespace ranges {
_First2 + static_cast<iter_difference_t<_It2>>(_Pos)};
}
}
#endif // ^^^ _VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ _VECTORIZED_MISMATCH ^^^
for (; _Count != 0; ++_First1, (void) ++_First2, --_Count) {
if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) {
break;
Expand Down Expand Up @@ -5905,13 +5910,13 @@ namespace ranges {

template <class _Elem1, class _Elem2>
constexpr bool _Lex_compare_memcmp_classify_elements =
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
is_integral_v<_Elem1> && is_integral_v<_Elem2> && sizeof(_Elem1) == sizeof(_Elem2)
&& is_unsigned_v<_Elem1> == is_unsigned_v<_Elem2>;
#else // ^^^ _VECTORIZED_FOR_X64_X86 / !_VECTORIZED_FOR_X64_X86 vvv
#else // ^^^ _VECTORIZED_MISMATCH / !_VECTORIZED_MISMATCH vvv
conjunction_v<_Is_character_or_bool<_Elem1>, _Is_character_or_bool<_Elem2>, is_unsigned<_Elem1>,
is_unsigned<_Elem2>>;
#endif // ^^^ !_VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ !_VECTORIZED_MISMATCH ^^^

#ifdef __cpp_lib_byte
template <>
Expand Down Expand Up @@ -5986,7 +5991,7 @@ _NODISCARD _CONSTEXPR20 bool lexicographical_compare(
const auto _Num1 = static_cast<size_t>(_ULast1 - _UFirst1);
const auto _Num2 = static_cast<size_t>(_ULast2 - _UFirst2);
const size_t _Num = (_STD min) (_Num1, _Num2);
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
const auto _First1_ptr = _STD _To_address(_UFirst1);
const auto _First2_ptr = _STD _To_address(_UFirst2);
const size_t _Pos = _Mismatch_vectorized<sizeof(*_First1_ptr)>(_First1_ptr, _First2_ptr, _Num);
Expand All @@ -5997,10 +6002,10 @@ _NODISCARD _CONSTEXPR20 bool lexicographical_compare(
} else {
return _Pred(_First1_ptr[_Pos], _First2_ptr[_Pos]);
}
#else // ^^^ _VECTORIZED_FOR_X64_X86 / !_VECTORIZED_FOR_X64_X86 vvv
#else // ^^^ _VECTORIZED_MISMATCH / !_VECTORIZED_MISMATCH vvv
const int _Ans = _STD _Memcmp_count(_UFirst1, _UFirst2, _Num);
return _Memcmp_pred{}(_Ans, 0) || (_Ans == 0 && _Num1 < _Num2);
#endif // ^^^ !_VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ !_VECTORIZED_MISMATCH ^^^
}
}

Expand Down Expand Up @@ -6111,7 +6116,7 @@ _NODISCARD constexpr auto lexicographical_compare_three_way(const _InIt1 _First1
const auto _Num1 = static_cast<size_t>(_ULast1 - _UFirst1);
const auto _Num2 = static_cast<size_t>(_ULast2 - _UFirst2);
const size_t _Num = (_STD min) (_Num1, _Num2);
#if _VECTORIZED_FOR_X64_X86
#if _VECTORIZED_MISMATCH
const auto _First1_ptr = _STD to_address(_UFirst1);
const auto _First2_ptr = _STD to_address(_UFirst2);
const size_t _Pos = _Mismatch_vectorized<sizeof(*_First1_ptr)>(_First1_ptr, _First2_ptr, _Num);
Expand All @@ -6125,14 +6130,14 @@ _NODISCARD constexpr auto lexicographical_compare_three_way(const _InIt1 _First1
__assume(_Val1 != _Val2); // avoid one comparison
return _Comp(_Val1, _Val2);
}
#else // ^^^ _VECTORIZED_FOR_X64_X86 / !_VECTORIZED_FOR_X64_X86 vvv
#else // ^^^ _VECTORIZED_MISMATCH / !_VECTORIZED_MISMATCH vvv
const int _Ans = _STD _Memcmp_count(_UFirst1, _UFirst2, _Num);
if (_Ans == 0) {
return _Num1 <=> _Num2;
} else {
return _Memcmp_pred{}(_Ans, 0);
}
#endif // ^^^ !_VECTORIZED_FOR_X64_X86 ^^^
#endif // ^^^ !_VECTORIZED_MISMATCH ^^^
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void test_lex_compare_memcmp_classify_for_types() {
#endif // _HAS_CXX20
}

constexpr bool vec_alg = _VECTORIZED_FOR_X64_X86;
constexpr bool vec_alg = _VECTORIZED_MISMATCH;

template <bool Expected, class Type1, class Type2>
void test_lex_compare_memcmp_classify_for_1byte_integrals() {
Expand Down