Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge main to feature/mdspan2, include <limits> #3637

Merged
Merged
Changes from 1 commit
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
f6dadc0
Some Cpp Core Guidelines warning fixes (#2116)
AlexGuteniev Mar 30, 2023
f1206d8
P2374R4: `views::cartesian_product` (#3561)
JMazurkiewicz Mar 30, 2023
9231abe
Python cleanups (#3598)
StephanTLavavej Mar 30, 2023
5d4aa49
Revert the ppltasks change that introduced an `ole32.dll` dependency …
StephanTLavavej Apr 4, 2023
8a652e6
Document import library (#2141)
AlexGuteniev Apr 7, 2023
10f0c3a
Build the import lib with `_ENFORCE_ONLY_CORE_HEADERS` (#3621)
StephanTLavavej Apr 7, 2023
0461a50
`type_index::operator<=>` should not call the comparison function twi…
frederick-vs-ja Apr 7, 2023
97f5698
`<ranges>`: Explicitly specify the template parameters for `tuple` (#…
cpplearner Apr 7, 2023
b37ff31
Testing: Check new C++23 CPOs (#3610)
JMazurkiewicz Apr 7, 2023
adaf68c
Testing: Check `c(begin|end)` members of C++20 ranges (#3612)
JMazurkiewicz Apr 7, 2023
cb86d7e
Fix silent bad codegen for vectorized `meow_element()` above 4 GB (#3…
StephanTLavavej Apr 7, 2023
d494511
Don't include `<xmemory>` in `<optional>` and `<variant>` (#3624)
frederick-vs-ja Apr 7, 2023
2df667e
Move `_Char_traits_eq` and `_Char_traits_lt` from `<xstring>` to `<re…
frederick-vs-ja Apr 7, 2023
7eeef47
Don't include `<algorithm>` in `<chrono>` (#3626)
frederick-vs-ja Apr 7, 2023
b331f8d
Don't include `<bit>` in `<compare>` (#3627)
frederick-vs-ja Apr 7, 2023
e6a12f7
`vector_algorithms.cpp`: Add `vzeroupper`, so that it is there even i…
AlexGuteniev Apr 7, 2023
46124c7
Merge branch 'main' into unlimited-mdspan
StephanTLavavej Apr 7, 2023
34fce78
`<mdspan>` needs `<limits>` for `numeric_limits`.
StephanTLavavej Apr 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
vector_algorithms.cpp: Add vzeroupper, so that it is there even i…
…n `/Od` (#3630)

Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
AlexGuteniev and StephanTLavavej authored Apr 7, 2023
commit e6a12f7e12306cf0d6f256f70777e0107993feec
37 changes: 37 additions & 0 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
@@ -61,6 +61,19 @@ namespace {
void _Advance_bytes(const void*& _Target, ptrdiff_t _Offset) noexcept {
_Target = static_cast<const unsigned char*>(_Target) + _Offset;
}

// TRANSITION, DevCom-10331414
struct [[nodiscard]] _Zeroupper_on_exit {
_Zeroupper_on_exit() = default;

_Zeroupper_on_exit(const _Zeroupper_on_exit&) = delete;
_Zeroupper_on_exit& operator=(const _Zeroupper_on_exit&) = delete;

~_Zeroupper_on_exit() {
_mm256_zeroupper();
}
};

} // unnamed namespace

extern "C" {
@@ -84,6 +97,8 @@ __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
_Advance_bytes(_First1, 32);
_Advance_bytes(_First2, 32);
} while (_First1 != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

constexpr size_t _Mask_16 = ~((static_cast<size_t>(1) << 4) - 1);
@@ -169,6 +184,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
@@ -209,6 +226,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
@@ -245,6 +264,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
@@ -279,6 +300,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
_mm256_storeu_si256(static_cast<__m256i*>(_Last), _Left_reversed);
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
@@ -315,6 +338,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
@@ -350,6 +375,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
@@ -382,6 +409,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
@@ -412,6 +441,8 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
_mm256_storeu_si256(static_cast<__m256i*>(_Dest), _Block_reversed);
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
@@ -1197,6 +1228,8 @@ namespace {
template <class _Traits, class _Ty>
const void* __stdcall __std_find_trivial_unsized(const void* _First, const _Ty _Val) noexcept {
if (_Use_avx2()) {
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414

// We read by vector-sized pieces, and we align pointers to vector-sized boundary.
// From start partial piece we mask out matches that don't belong to the range.
// This makes sure we never cross page boundary, thus we read 'as if' sequentially.
@@ -1279,6 +1312,8 @@ namespace {

const size_t _Avx_size = _Size_bytes & ~size_t{0x1F};
if (_Avx_size != 0 && _Use_avx2()) {
_Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414

const __m256i _Comparand = _Traits::_Set_avx(_Val);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Avx_size);
@@ -1338,6 +1373,8 @@ namespace {
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
_Size_bytes &= 0x1F;

_mm256_zeroupper(); // TRANSITION, DevCom-10331414
}

const size_t _Sse_size = _Size_bytes & ~size_t{0xF};