Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Floating minmax: fix negative zero handling and dedicated test coverage for arrays of +0.0 and -0.0 only #4734

Merged
merged 38 commits into from
Aug 25, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b1e4baf
Sedicated test coverage for floating minmax of +0.0 and -0.0 only
AlexGuteniev Jun 18, 2024
18cc8b1
expand test
AlexGuteniev Jun 19, 2024
fba7701
expand test with canned simple case
AlexGuteniev Jun 20, 2024
f0b43fb
Fix the bug
AlexGuteniev Jun 20, 2024
52eda32
Merge branch 'main' into float_zeros
AlexGuteniev Jun 20, 2024
4040d48
fix merge error
AlexGuteniev Jun 20, 2024
75acc88
more interesting predefined cases
AlexGuteniev Jun 20, 2024
48c23b5
Even more coverage
AlexGuteniev Jun 21, 2024
7d00070
Implement `minmax` in terms of `minmax_element`
AlexGuteniev Jun 21, 2024
4302235
Fix ascending order
AlexGuteniev Jun 21, 2024
1f124c4
tail correctness is not needed anymore
AlexGuteniev Jun 21, 2024
0a0be65
Merge remote-tracking branch 'upstream/main' into float_zeros
AlexGuteniev Jun 22, 2024
b34c73e
even better random coverage
AlexGuteniev Jun 22, 2024
893792f
Don't check zeros for fp:fast
AlexGuteniev Jun 22, 2024
ff3e15b
Restore the full optimization, it is fine for `/fp:fast`
AlexGuteniev Jun 22, 2024
8bdda8b
disable value-based floating vector algorithms for non-fast-math in h…
AlexGuteniev Jun 22, 2024
3f0b77f
/fp:fast coverage
AlexGuteniev Jun 22, 2024
b7b19d0
fix Both_val
AlexGuteniev Jun 22, 2024
1a818d6
common header
AlexGuteniev Jun 23, 2024
714be8d
Merge branch 'main' into float_zeros
StephanTLavavej Aug 19, 2024
e1a5ff7
Add `#pragma once` to new test header.
StephanTLavavej Aug 19, 2024
437454e
Include more headers.
StephanTLavavej Aug 19, 2024
684cd27
Include fewer headers.
StephanTLavavej Aug 19, 2024
206ef5c
Add `std::` qualification.
StephanTLavavej Aug 19, 2024
7a0d538
Remove `std::` qualification.
StephanTLavavej Aug 19, 2024
7891ef7
Drop unnecessary `if constexpr` suppression.
StephanTLavavej Aug 19, 2024
158a007
Take a function object instead of a function pointer.
StephanTLavavej Aug 19, 2024
1058249
Header-only functions should be `inline`.
StephanTLavavej Aug 19, 2024
1384cf8
Use consistent preprocessor guards.
StephanTLavavej Aug 19, 2024
e50cf12
Add new test to test.lst.
StephanTLavavej Aug 19, 2024
129c8d7
Fix code typo: `-1, 0` => `-1.0`
StephanTLavavej Aug 19, 2024
3ccad6c
Fix bug: `-0` => `-0.0`
StephanTLavavej Aug 19, 2024
09450f6
Add missing quotes.
StephanTLavavej Aug 20, 2024
b1fb57b
Drop /fp options instead of whole lines.
StephanTLavavej Aug 20, 2024
8d55da1
Adjust endif comment to match.
StephanTLavavej Aug 20, 2024
2e947b6
Drop duplicate comment in vector_algorithms.cpp.
StephanTLavavej Aug 20, 2024
b7c840c
Improve comments.
StephanTLavavej Aug 19, 2024
e1c4d4f
Drop /clr and /clr:pure lines.
StephanTLavavej Aug 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -10205,7 +10205,7 @@ _NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pr
_STL_ASSERT(
_Ilist.size() != 0, "An initializer_list passed to std::minmax must not be empty. (N4971 [alg.min.max]/21)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if constexpr (_Is_min_max_value_optimization_safe<const _Ty*, _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Result = _STD _Minmax_vectorized(_Ilist.begin(), _Ilist.end());
return {static_cast<_Ty>(_Result._Min), static_cast<_Ty>(_Result._Max)};
Expand Down Expand Up @@ -10334,13 +10334,21 @@ namespace ranges {

using _Vty = iter_value_t<_It>;
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe<_It, _Pr>
&& sized_sentinel_for<_Se, _It>) {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD _Minmax_vectorized(_First_ptr, _Last_ptr);
return {static_cast<_Vty>(_Result._Min), static_cast<_Vty>(_Result._Max)};
if constexpr (is_same_v<_Pj, identity> && sized_sentinel_for<_Se, _It>) {
if constexpr (_Is_min_max_value_optimization_safe<_It, _Pr>) {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD _Minmax_vectorized(_First_ptr, _Last_ptr);
return {static_cast<_Vty>(_Result._Min), static_cast<_Vty>(_Result._Max)};
}
} else if constexpr (_Is_min_max_optimization_safe<_It, _Pr>) {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD _Minmax_element_vectorized(_First_ptr, _Last_ptr);
return {*static_cast<const _Vty*>(_Result.first), *static_cast<const _Vty*>(_Result.second)};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
Expand Down
30 changes: 24 additions & 6 deletions stl/inc/xutility
Original file line number Diff line number Diff line change
Expand Up @@ -6783,6 +6783,24 @@ constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms
#endif // _HAS_CXX20
is_same<_Pr, less<>>, is_same<_Pr, less<_Elem>>>>; // predicate is less

// The value-based vectorized rather than the position-based one does not always produce
// the expected results for floatting point types.
//
// Efficient vectorization needs to find vertical minmax first, and then the horizontal one.
// This alters order of comparison: index zero element is first compared against
// vector size equal index element and only in the end against index one element.
// With equivalent but distinguishable +0.0 and -0.0 values, the altered comparison order
// will not produce the expected result in some cases (will return +0.0 instead of -0.0 or the reverse)
//
// The result is still acceptable for /fp:fast when +0.0 / -0.0 are not expected to be properly distinguished,
// and the compiler itself takes advantage of it.
template <class _Iter, class _Pr, class _Elem = _Iter_value_t<_Iter>>
constexpr bool _Is_min_max_value_optimization_safe = // Activate the vector algorithms for ranges::min/max?
#ifndef _M_FP_FAST
!is_floating_point_v<_Elem> &&
#endif // !_M_FP_FAST
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
_Is_min_max_optimization_safe<_Iter, _Pr, _Elem>;

template <class _FwdIt, class _Pr>
constexpr _FwdIt _Max_element_unchecked(_FwdIt _First, _FwdIt _Last, _Pr _Pred) { // find largest element
#if _USE_STD_VECTOR_ALGORITHMS
Expand Down Expand Up @@ -6913,7 +6931,7 @@ _NODISCARD constexpr _Ty(max)(initializer_list<_Ty> _Ilist, _Pr _Pred) {
_STL_ASSERT(
_Ilist.size() != 0, "An initializer_list passed to std::max must not be empty. (N4971 [alg.min.max]/13)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if constexpr (_Is_min_max_value_optimization_safe<const _Ty*, _Pr>) {
if (!_Is_constant_evaluated()) {
return static_cast<_Ty>(_STD _Max_vectorized(_Ilist.begin(), _Ilist.end()));
}
Expand Down Expand Up @@ -6959,7 +6977,7 @@ namespace ranges {
_STL_ASSERT(_First != _Last,
"An initializer_list passed to std::ranges::max must not be empty. (N4971 [alg.min.max]/13)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_value_optimization_safe<const _Ty*, _Pr>) {
if (!_STD is_constant_evaluated()) {
return static_cast<_Ty>(_STD _Max_vectorized(_First, _Last));
}
Expand All @@ -6978,7 +6996,7 @@ namespace ranges {
_STL_ASSERT(
_UFirst != _ULast, "A range passed to std::ranges::max must not be empty. (N4971 [alg.min.max]/13)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe<decltype(_UFirst), _Pr>
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_value_optimization_safe<decltype(_UFirst), _Pr>
&& sized_sentinel_for<decltype(_ULast), decltype(_UFirst)>) {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_UFirst);
Expand Down Expand Up @@ -7137,7 +7155,7 @@ _NODISCARD constexpr _Ty(min)(initializer_list<_Ty> _Ilist, _Pr _Pred) {
_STL_ASSERT(
_Ilist.size() != 0, "An initializer_list passed to std::min must not be empty. (N4971 [alg.min.max]/5)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if constexpr (_Is_min_max_value_optimization_safe<const _Ty*, _Pr>) {
if (!_Is_constant_evaluated()) {
return static_cast<_Ty>(_STD _Min_vectorized(_Ilist.begin(), _Ilist.end()));
}
Expand Down Expand Up @@ -7177,7 +7195,7 @@ namespace ranges {
_STL_ASSERT(_First != _Last,
"An initializer_list passed to std::ranges::min must not be empty. (N4971 [alg.min.max]/5)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_value_optimization_safe<const _Ty*, _Pr>) {
if (!_STD is_constant_evaluated()) {
return static_cast<_Ty>(_STD _Min_vectorized(_First, _Last));
}
Expand All @@ -7196,7 +7214,7 @@ namespace ranges {
_STL_ASSERT(
_UFirst != _ULast, "A range passed to std::ranges::min must not be empty. (N4971 [alg.min.max]/5)");
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe<decltype(_UFirst), _Pr>
if constexpr (is_same_v<_Pj, identity> && _Is_min_max_value_optimization_safe<decltype(_UFirst), _Pr>
&& sized_sentinel_for<decltype(_ULast), decltype(_UFirst)>) {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_UFirst);
Expand Down
44 changes: 28 additions & 16 deletions stl/src/vector_algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,8 +1415,8 @@ namespace {
template <class _Fn>
static __m128 _H_func(const __m128 _Cur, _Fn _Funct) noexcept {
__m128 _H_min_val = _Cur;
_H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2)));
_H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1)));
_H_min_val = _Funct(_mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1)), _H_min_val);
_H_min_val = _Funct(_mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2)), _H_min_val);
return _H_min_val;
}

Expand Down Expand Up @@ -1457,11 +1457,11 @@ namespace {
}

static __m128 _Min(const __m128 _First, const __m128 _Second, __m128 = _mm_undefined_ps()) noexcept {
return _mm_min_ps(_First, _Second);
return _mm_min_ps(_Second, _First);
}

static __m128 _Max(const __m128 _First, const __m128 _Second, __m128 = _mm_undefined_ps()) noexcept {
return _mm_max_ps(_First, _Second);
return _mm_max_ps(_Second, _First);
}

static __m128i _Mask_cast(const __m128 _Mask) noexcept {
Expand All @@ -1485,9 +1485,9 @@ namespace {
template <class _Fn>
static __m256 _H_func(const __m256 _Cur, _Fn _Funct) noexcept {
__m256 _H_min_val = _Cur;
_H_min_val = _Funct(_H_min_val, _mm256_permute2f128_ps(_H_min_val, _mm256_undefined_ps(), 0x01));
_H_min_val = _Funct(_H_min_val, _mm256_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2)));
_H_min_val = _Funct(_H_min_val, _mm256_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1)));
_H_min_val = _Funct(_mm256_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1)), _H_min_val);
_H_min_val = _Funct(_mm256_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2)), _H_min_val);
_H_min_val = _Funct(_mm256_permute2f128_ps(_H_min_val, _mm256_undefined_ps(), 0x01), _H_min_val);
return _H_min_val;
}

Expand Down Expand Up @@ -1528,11 +1528,11 @@ namespace {
}

static __m256 _Min(const __m256 _First, const __m256 _Second, __m256 = _mm256_undefined_ps()) noexcept {
return _mm256_min_ps(_First, _Second);
return _mm256_min_ps(_Second, _First);
}

static __m256 _Max(const __m256 _First, const __m256 _Second, __m256 = _mm256_undefined_ps()) noexcept {
return _mm256_max_ps(_First, _Second);
return _mm256_max_ps(_Second, _First);
}

static __m256i _Mask_cast(const __m256 _Mask) noexcept {
Expand Down Expand Up @@ -1575,7 +1575,7 @@ namespace {
template <class _Fn>
static __m128d _H_func(const __m128d _Cur, _Fn _Funct) noexcept {
__m128d _H_min_val = _Cur;
_H_min_val = _Funct(_H_min_val, _mm_shuffle_pd(_H_min_val, _H_min_val, 1));
_H_min_val = _Funct(_mm_shuffle_pd(_H_min_val, _H_min_val, 1), _H_min_val);
return _H_min_val;
}

Expand Down Expand Up @@ -1615,11 +1615,11 @@ namespace {
}

static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d = _mm_undefined_pd()) noexcept {
return _mm_min_pd(_First, _Second);
return _mm_min_pd(_Second, _First);
}

static __m128d _Max(const __m128d _First, const __m128d _Second, __m128d = _mm_undefined_pd()) noexcept {
return _mm_max_pd(_First, _Second);
return _mm_max_pd(_Second, _First);
}

static __m128i _Mask_cast(const __m128d _Mask) noexcept {
Expand All @@ -1643,8 +1643,8 @@ namespace {
template <class _Fn>
static __m256d _H_func(const __m256d _Cur, _Fn _Funct) noexcept {
__m256d _H_min_val = _Cur;
_H_min_val = _Funct(_H_min_val, _mm256_permute4x64_pd(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2)));
_H_min_val = _Funct(_H_min_val, _mm256_shuffle_pd(_H_min_val, _H_min_val, 0b0101));
_H_min_val = _Funct(_mm256_shuffle_pd(_H_min_val, _H_min_val, 0b0101), _H_min_val);
_H_min_val = _Funct(_mm256_permute4x64_pd(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2)), _H_min_val);
return _H_min_val;
}

Expand Down Expand Up @@ -1685,11 +1685,11 @@ namespace {
}

static __m256d _Min(const __m256d _First, const __m256d _Second, __m256d = _mm256_undefined_pd()) noexcept {
return _mm256_min_pd(_First, _Second);
return _mm256_min_pd(_Second, _First);
}

static __m256d _Max(const __m256d _First, const __m256d _Second, __m256d = _mm256_undefined_pd()) noexcept {
return _mm256_max_pd(_First, _Second);
return _mm256_max_pd(_Second, _First);
}

static __m256i _Mask_cast(const __m256d _Mask) noexcept {
Expand Down Expand Up @@ -1982,6 +1982,18 @@ namespace {

template <_Min_max_mode _Mode, class _Traits, bool _Sign>
auto __std_minmax_impl(const void* _First, const void* const _Last) noexcept {
// The value-based vectorized rather than the position-based one does not always produce
// the expected results for floatting point types.
//
// Efficient vectorization needs to find vertical minmax first, and then the horizontal one.
// This alters order of comparison: index zero element is first compared against
// vector size equal index element and only in the end against index one element.
// With equivalent but distinguishable +0.0 and -0.0 values, the altered comparison order
// will not produce the expected result in some cases (will return +0.0 instead of -0.0 or the reverse)
//
// The result is still acceptable for /fp:fast when +0.0 / -0.0 are not expected to be properly distinguished,
// and the compiler itself takes advantage of it.
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

using _Ty = std::conditional_t<_Sign, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>;

_Ty _Cur_min_val; // initialized in both of the branches below
Expand Down
12 changes: 12 additions & 0 deletions tests/std/include/test_min_max_element_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <type_traits>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -116,6 +117,17 @@ void test_case_min_max_element(const std::vector<T>& input) {
assert(*expected_max == actual_max_value);
assert(*expected_minmax.first == actual_minmax_value.min);
assert(*expected_minmax.second == actual_minmax_value.max);

#ifndef _M_FP_FAST
// With /fp:fast mode the compiler does not try to produce the code that correctly
// distincts +0.0 and -0.0, so the algorithms are not expected to either.
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
if constexpr (std::is_floating_point_v<T>) {
assert(signbit(*expected_min) == signbit(actual_min_value));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
assert(signbit(*expected_max) == signbit(actual_max_value));
assert(signbit(*expected_minmax.first) == signbit(actual_minmax_value.min));
assert(signbit(*expected_minmax.second) == signbit(actual_minmax_value.max));
}
#endif // !defined(_M_FP_FAST)
}
#endif // _HAS_CXX20
}
62 changes: 62 additions & 0 deletions tests/std/include/test_vector_algorithms_support.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
#include <isa_availability.h>
#include <random>
#include <vector>
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

#pragma warning(disable : 4984) // 'if constexpr' is a C++17 language extension
#ifdef __clang__
#pragma clang diagnostic ignored "-Wc++17-extensions" // constexpr if is a C++17 extension
#endif // __clang__
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved

void initialize_randomness(std::mt19937_64& gen) {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
constexpr size_t n = std::mt19937_64::state_size;
constexpr size_t w = std::mt19937_64::word_size;
static_assert(w % 32 == 0, "w should be evenly divisible by 32");
constexpr size_t k = w / 32;

std::vector<uint32_t> vec(n * k);

std::random_device rd;
std::generate(vec.begin(), vec.end(), ref(rd));

printf("This is a randomized test.\n");
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
printf("DO NOT IGNORE/RERUN ANY FAILURES.\n");
printf("You must report them to the STL maintainers.\n\n");

printf("Seed vector: ");
for (const auto& e : vec) {
printf("%u,", e);
}
printf("\n");

std::seed_seq seq(vec.cbegin(), vec.cend());
gen.seed(seq);
}

#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE)
extern "C" long __isa_enabled;

void disable_instructions(ISA_AVAILABILITY isa) {
__isa_enabled &= ~(1UL << static_cast<unsigned long>(isa));
}
#endif // (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE)

constexpr size_t dataCount = 1024;

void run_randomized_tests_with_different_isa_levels(void tests(std::mt19937_64& gen)) {
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
std::mt19937_64 gen;
initialize_randomness(gen);

tests(gen);
#ifndef _M_CEE_PURE
#if defined(_M_IX86) || defined(_M_X64)
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
disable_instructions(__ISA_AVAILABLE_AVX2);
tests(gen);

disable_instructions(__ISA_AVAILABLE_SSE42);
tests(gen);
#endif // defined(_M_IX86) || defined(_M_X64)
#endif // _M_CEE_PURE
}
Loading