Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a bitwise and instead of shifts #3092

Merged
merged 3 commits into from
Sep 13, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
// vpermq to load left and right, and transpose the lanes
Expand All @@ -181,7 +181,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -203,7 +203,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Left = _mm256_loadu_si256(static_cast<__m256i*>(_First));
Expand All @@ -221,7 +221,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
if (_Byte_length(_First, _Last) >= 32 && _Use_sse42()) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -240,7 +240,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _First, void* _Last) noexcept {
if (_Byte_length(_First, _Last) >= 64 && _Use_avx2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
const __m256i _Shuf = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
do {
_Advance_bytes(_Last, -32);
Expand All @@ -256,7 +256,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -275,7 +275,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _First, void* _Last) noexcept {
if (_Byte_length(_First, _Last) >= 64 && _Use_avx2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Left = _mm256_loadu_si256(static_cast<__m256i*>(_First));
Expand All @@ -290,7 +290,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs

if (_Byte_length(_First, _Last) >= 32 && _Use_sse2()) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 4);
_Advance_bytes(_Stop_at, (_Byte_length(_First, _Last) >> 1) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Left = _mm_loadu_si128(static_cast<__m128i*>(_First));
Expand All @@ -313,7 +313,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -327,7 +327,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -348,7 +348,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -362,7 +362,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -380,7 +380,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
const void* _First, const void* _Last, void* _Dest) noexcept {
if (_Byte_length(_First, _Last) >= 32 && _Use_avx2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
const __m256i _Shuf = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
do {
_Advance_bytes(_Last, -32);
Expand All @@ -393,7 +393,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand All @@ -411,7 +411,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
const void* _First, const void* _Last, void* _Dest) noexcept {
if (_Byte_length(_First, _Last) >= 32 && _Use_avx2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0x1F});
do {
_Advance_bytes(_Last, -32);
const __m256i _Block = _mm256_loadu_si256(static_cast<const __m256i*>(_Last));
Expand All @@ -423,7 +423,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(

if (_Byte_length(_First, _Last) >= 16 && _Use_sse2()) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 4 << 4);
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) & ~size_t{0xF});
do {
_Advance_bytes(_Last, -16);
const __m128i _Block = _mm_loadu_si128(static_cast<const __m128i*>(_Last));
Expand Down