Skip to content

Commit

Permalink
Revert vectorized implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanTLavavej committed Jun 10, 2024
1 parent 95ba820 commit 72a0d29
Show file tree
Hide file tree
Showing 4 changed files with 0 additions and 298 deletions.
19 changes: 0 additions & 19 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -2150,25 +2150,6 @@ _NODISCARD _CONSTEXPR20 _FwdItHaystack search(_FwdItHaystack _First1, _FwdItHays
if constexpr (_Is_ranges_random_iter_v<_FwdItHaystack> && _Is_ranges_random_iter_v<_FwdItPat>) {
const _Iter_diff_t<_FwdItPat> _Count2 = _ULast2 - _UFirst2;
if (_ULast1 - _UFirst1 >= _Count2) {
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<decltype(_UFirst1), decltype(_UFirst2), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_UFirst1);

const auto _Ptr_res1 = _STD _Search_vectorized(
_Ptr1, _STD _To_address(_ULast1), _STD _To_address(_UFirst2), _STD _To_address(_ULast2));

if constexpr (is_pointer_v<decltype(_UFirst1)>) {
_UFirst1 = _Ptr_res1;
} else {
_UFirst1 += _Ptr_res1 - _Ptr1;
}

_STD _Seek_wrapped(_Last1, _UFirst1);
return _Last1;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
const auto _Last_possible = _ULast1 - static_cast<_Iter_diff_t<_FwdItHaystack>>(_Count2);
for (;; ++_UFirst1) {
if (_STD _Equal_rev_pred_unchecked(_UFirst1, _UFirst2, _ULast2, _STD _Pass_fn(_Pred))) {
Expand Down
23 changes: 0 additions & 23 deletions stl/inc/functional
Original file line number Diff line number Diff line change
Expand Up @@ -2459,29 +2459,6 @@ _CONSTEXPR20 pair<_FwdItHaystack, _FwdItHaystack> _Search_pair_unchecked(
_Iter_diff_t<_FwdItHaystack> _Count1 = _Last1 - _First1;
_Iter_diff_t<_FwdItPat> _Count2 = _Last2 - _First2;

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<_FwdItHaystack, _FwdItPat, _Pred_eq>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Ptr1 = _STD _To_address(_First1);

const auto _Ptr_res1 = _STD _Search_vectorized(
_Ptr1, _STD _To_address(_Last1), _STD _To_address(_First2), _STD _To_address(_Last2));

if constexpr (is_pointer_v<_FwdItHaystack>) {
_First1 = _Ptr_res1;
} else {
_First1 += _Ptr_res1 - _Ptr1;
}

if (_First1 != _Last1) {
return {_First1, _First1 + _Count2};
} else {
return {_Last1, _Last1};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

for (; _Count2 <= _Count1; ++_First1, (void) --_Count1) { // room for match, try it
_FwdItHaystack _Mid1 = _First1;
for (_FwdItPat _Mid2 = _First2;; ++_Mid1, (void) ++_Mid2) {
Expand Down
65 changes: 0 additions & 65 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,6 @@ const void* __stdcall __std_find_trivial_2(const void* _First, const void* _Last
const void* __stdcall __std_find_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
const void* __stdcall __std_find_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;

const void* __stdcall __std_search_1(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_2(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_4(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_search_8(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;

const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
Expand Down Expand Up @@ -204,22 +195,6 @@ _Ty* _Find_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noe
}
}

template <class _Ty1, class _Ty2>
_Ty1* _Search_vectorized(_Ty1* const _First1, _Ty1* const _Last1, _Ty2* const _First2, _Ty2* const _Last2) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_1(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 2) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_2(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 4) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_4(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 8) {
return const_cast<_Ty1*>(static_cast<const _Ty1*>(::__std_search_8(_First1, _Last1, _First2, _Last2)));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

template <class _Ty>
_Ty* _Min_element_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
constexpr bool _Signed = is_signed_v<_Ty>;
Expand Down Expand Up @@ -6769,46 +6744,6 @@ namespace ranges {
_STL_INTERNAL_CHECK(_RANGES distance(_First1, _Last1) == _Count1);
_STL_INTERNAL_CHECK(_RANGES distance(_First2, _Last2) == _Count2);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity>
&& is_same_v<_Pj2, identity>) {
if (!_STD is_constant_evaluated()) {
const auto _Ptr1 = _STD to_address(_First1);
const auto _Ptr2 = _STD to_address(_First2);
remove_const_t<decltype(_Ptr1)> _Ptr_last1;
remove_const_t<decltype(_Ptr2)> _Ptr_last2;

if constexpr (is_same_v<_It1, _Se1>) {
_Ptr_last1 = _STD to_address(_Last1);
} else {
_Ptr_last1 = _Ptr1 + _Count1;
}

if constexpr (is_same_v<_It2, _Se2>) {
_Ptr_last2 = _STD to_address(_Last2);
} else {
_Ptr_last2 = _Ptr2 + _Count2;
}

const auto _Ptr_res1 = _STD _Search_vectorized(_Ptr1, _Ptr_last1, _Ptr2, _Ptr_last2);

if constexpr (is_pointer_v<_It1>) {
if (_Ptr_res1 != _Ptr_last1) {
return {_Ptr_res1, _Ptr_res1 + _Count2};
} else {
return {_Ptr_res1, _Ptr_res1};
}
} else {
_First1 += _Ptr_res1 - _Ptr1;
if (_First1 != _Last1) {
return {_First1, _First1 + _Count2};
} else {
return {_First1, _First1};
}
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
for (; _Count1 >= _Count2; ++_First1, (void) --_Count1) {
auto _Match_and_mid1 = _RANGES _Equal_rev_pred(_First1, _First2, _Last2, _Pred, _Proj1, _Proj2);
if (_Match_and_mid1.first) {
Expand Down
191 changes: 0 additions & 191 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2636,177 +2636,6 @@ namespace {

return _Result;
}

#ifndef _M_ARM64EC
template <class _Ty>
bool _Equal_avx2(const void* _First1, const void* _First2, size_t _Size) noexcept {
// no need for DevCom-10331414 workaround; this function is called only from AVX2 path

// preconditions: non-zero length needle, first is already equal
_Advance_bytes(_First1, sizeof(_Ty));
_Advance_bytes(_First2, sizeof(_Ty));
_Size -= sizeof(_Ty);

const void* _Stop1 = _First1;
_Advance_bytes(_Stop1, _Size & ~size_t{0x1F});

while (_First1 != _Stop1) {
const __m256i _Data1 = _mm256_loadu_si256(static_cast<const __m256i*>(_First1));
const __m256i _Data2 = _mm256_loadu_si256(static_cast<const __m256i*>(_First2));
const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2);
if (!_mm256_testz_si256(_Eq, _Eq)) {
return false;
}

_Advance_bytes(_First1, 32);
_Advance_bytes(_First2, 32);
}

if (const size_t _Avx_tail_size = _Size & 0x1C; _Avx_tail_size != 0) {
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2);
const __m256i _Data1 = _mm256_maskload_epi32(static_cast<const int*>(_First1), _Tail_mask);
const __m256i _Data2 = _mm256_maskload_epi32(static_cast<const int*>(_First2), _Tail_mask);
const __m256i _Eq = _mm256_xor_si256(_Data1, _Data2);
if (!_mm256_testz_si256(_Eq, _Eq)) {
return false;
}

_Advance_bytes(_First1, _Avx_tail_size);
_Advance_bytes(_First2, _Avx_tail_size);
}

if constexpr (sizeof(_Ty) <= 2) {
const void* _Stop1_final_tail = _First1;
_Advance_bytes(_Stop1_final_tail, _Size & 0x3);

while (_First1 != _Stop1_final_tail) {
if (*static_cast<const _Ty*>(_First1) != *static_cast<const _Ty*>(_First2)) {
return false;
}
_Advance_bytes(_First1, sizeof(_Ty));
_Advance_bytes(_First2, sizeof(_Ty));
}
}

return true;
}
#endif // !defined(_M_ARM64EC)

template <class _Traits, class _Ty>
const void* __stdcall __std_search_impl(
const void* _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept {
const size_t _Size_bytes_2 = _Byte_length(_First2, _Last2);

if (_Size_bytes_2 == 0) {
return _First1;
}

if (_Size_bytes_2 == sizeof(_Ty)) {
return __std_find_trivial_impl<_Traits, _Ty>(_First1, _Last1, *static_cast<const _Ty*>(_First2));
}

const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1);
if (_Size_bytes_1 < _Size_bytes_2) {
return _Last1;
}

const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2 + sizeof(_Ty);

#ifndef _M_ARM64EC
if (_Use_avx2()) {
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414

const __m256i _Comparand = _Traits::_Set_avx(*static_cast<const _Ty*>(_First2));
const void* _Stop1 = _First1;
_Advance_bytes(_Stop1, _Max_pos & ~size_t{0x1F});

while (_First1 != _Stop1) {
const __m256i _Data = _mm256_loadu_si256(static_cast<const __m256i*>(_First1));
long _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand));

while (_Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);

const void* _Match1 = _First1;
_Advance_bytes(_Match1, _Offset);

if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) {
return _Match1;
}

_bittestandreset(&_Bingo, _Offset);
}

_Advance_bytes(_First1, 32);
}

if (const size_t _Avx_tail_size = _Max_pos & 0x1C; _Avx_tail_size != 0) {
const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size >> 2);
const __m256i _Data = _mm256_maskload_epi32(static_cast<const int*>(_First1), _Tail_mask);
long _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask));

while (_Bingo != 0) {
const unsigned long _Offset = _tzcnt_u32(_Bingo);

const void* _Match1 = _First1;
_Advance_bytes(_Match1, _Offset);

if (_Equal_avx2<_Ty>(_Match1, _First2, _Size_bytes_2)) {
return _Match1;
}

_bittestandreset(&_Bingo, _Offset);
}

_Advance_bytes(_First1, _Avx_tail_size);
}

if constexpr (sizeof(_Ty) <= 2) {
const void* _Stop1_final_tail = _First1;
_Advance_bytes(_Stop1_final_tail, _Max_pos & 0x3);

while (_First1 != _Stop1_final_tail) {
if (*static_cast<const _Ty*>(_First1) == *static_cast<const _Ty*>(_First2)) {
if (_Equal_avx2<_Ty>(_First1, _First2, _Size_bytes_2)) {
return _First1;
}
}

_Advance_bytes(_First1, sizeof(_Ty));
}
}

return _Last1;
} else
#endif // !defined(_M_ARM64EC)
{
auto _Ptr1 = static_cast<const _Ty*>(_First1);
const auto _Ptr2 = static_cast<const _Ty*>(_First2);
const size_t _Count2 = _Size_bytes_2 / sizeof(_Ty);
const void* _Stop1 = _Ptr1;
_Advance_bytes(_Stop1, _Max_pos);

for (; _Ptr1 != _Stop1; ++_Ptr1) {
if (*_Ptr1 != *_Ptr2) {
continue;
}

bool _Equal = true;

for (size_t _Idx = 1; _Idx != _Count2; ++_Idx) {
if (_Ptr1[_Idx] != _Ptr2[_Idx]) {
_Equal = false;
break;
}
}

if (_Equal) {
return _Ptr1;
}
}
return _Last1;
}
}
} // unnamed namespace

extern "C" {
Expand Down Expand Up @@ -2912,26 +2741,6 @@ const void* __stdcall __std_find_first_of_trivial_8(
return __std_find_first_of::_Impl_4_8<__std_find_first_of::_Traits_8>(_First1, _Last1, _First2, _Last2);
}

const void* __stdcall __std_search_1(
const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept {
return __std_search_impl<_Find_traits_1, uint8_t>(_First1, _Last1, _First2, _Last2);
}

const void* __stdcall __std_search_2(
const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept {
return __std_search_impl<_Find_traits_2, uint16_t>(_First1, _Last1, _First2, _Last2);
}

const void* __stdcall __std_search_4(
const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept {
return __std_search_impl<_Find_traits_4, uint32_t>(_First1, _Last1, _First2, _Last2);
}

const void* __stdcall __std_search_8(
const void* const _First1, const void* const _Last1, const void* const _First2, const void* const _Last2) noexcept {
return __std_search_impl<_Find_traits_8, uint64_t>(_First1, _Last1, _First2, _Last2);
}

__declspec(noalias) size_t
__stdcall __std_mismatch_1(const void* const _First1, const void* const _First2, const size_t _Count) noexcept {
return __std_mismatch_impl<_Find_traits_1, uint8_t>(_First1, _First2, _Count);
Expand Down

0 comments on commit 72a0d29

Please sign in to comment.