Skip to content

Commit

Permalink
Use a bitwise and instead of shifts (microsoft#3092)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <[email protected]>
  • Loading branch information
2 people authored and CaseyCarter committed Oct 6, 2022
1 parent b57d443 commit 5774640
Showing 1 changed file with 16 additions and 16 deletions.
32 changes: 16 additions & 16 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
// vpermq to load left and right, and transpose the lanes
Expand All @@ -181,7 +181,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -203,7 +203,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Left = _mm256_loadu_si256(static_cast<__m256i*>(_First));
Expand All @@ -221,7 +221,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -240,7 +240,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _First, void* _Last) noexcept {
if (_Byte_length(_First, _Last) >= 64 && _Use_avx2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
const __m256i _Shuf = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
do {
_Advance_bytes(_Last, -32);
Expand All @@ -256,7 +256,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -275,7 +275,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _First, void* _Last) noexcept {
if (_Byte_length(_First, _Last) >= 64 && _Use_avx2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Left = _mm256_loadu_si256(static_cast<__m256i*>(_First));
Expand All @@ -290,7 +290,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -313,7 +313,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -327,7 +327,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -348,7 +348,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -362,7 +362,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -380,7 +380,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
const void* _First, const void* _Last, void* _Dest) noexcept {
if (_Byte_length(_First, _Last) >= 32 && _Use_avx2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
const __m256i _Shuf = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
do {
_Advance_bytes(_Last, -32);
Expand All @@ -393,7 +393,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -411,7 +411,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
const void* _First, const void* _Last, void* _Dest) noexcept {
if (_Byte_length(_First, _Last) >= 32 && _Use_avx2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -423,7 +423,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand Down

0 comments on commit 5774640

Please sign in to comment.