Skip to content

Commit 9959929

Browse files
statementreplycbezaultStephanTLavavej
authored
<complex>: Improve numerical accuracy of sqrt and log (#935)
* Fix undue overflow and underflow in complex sqrt Modifies the scale factors in `_Fabs` (used by `sqrt`) such that: - `_Fabs` doesn't underflow when the input is tiny. - `sqrt` doesn't overflow when the input is huge. * Improve accuracy of `log` when |z| is close to 1 When |z| is close to 1, compute log(|z|) as log1p(norm_minus_1(z)) / 2, where norm_minus_1(z) = real(z) ^ 2 + imag(z) ^ 2 - 1 computed with double width arithmetic to avoid catastrophic cancellation. * Fix log(complex{1, tiny}) incorrectly returning -0 under FE_DOWNWARD Co-authored-by: Curtis J Bezault <[email protected]> Co-authored-by: Stephan T. Lavavej <[email protected]>
1 parent 51ccd93 commit 9959929

File tree

13 files changed

+1425
-45
lines changed

13 files changed

+1425
-45
lines changed

stl/inc/complex

+366-44
Large diffs are not rendered by default.

stl/inc/xutility

+28-1
Original file line numberDiff line numberDiff line change
@@ -6131,16 +6131,20 @@ struct _Float_traits {
61316131
// traits for double and long double:
61326132
using type = unsigned long long;
61336133

6134+
static constexpr type _Sign_mask = 0x8000'0000'0000'0000ULL;
61346135
static constexpr type _Magnitude_mask = 0x7fff'ffff'ffff'ffffULL;
61356136
static constexpr type _Exponent_mask = 0x7ff0'0000'0000'0000ULL;
6137+
static constexpr type _Quiet_nan_mask = 0x0008'0000'0000'0000ULL;
61366138
};
61376139

61386140
template <>
61396141
struct _Float_traits<float> {
61406142
using type = unsigned int;
61416143

6144+
static constexpr type _Sign_mask = 0x8000'0000U;
61426145
static constexpr type _Magnitude_mask = 0x7fff'ffffU;
61436146
static constexpr type _Exponent_mask = 0x7f80'0000U;
6147+
static constexpr type _Quiet_nan_mask = 0x0040'0000U;
61446148
};
61456149

61466150
// FUNCTION TEMPLATE _Float_abs_bits
@@ -6156,12 +6160,36 @@ _NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_abs(const _Ty _Xx) { // constexpr floa
61566160
return _Bit_cast<_Ty>(_Float_abs_bits(_Xx));
61576161
}
61586162

6163+
// FUNCTION TEMPLATE _Float_copysign
6164+
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
6165+
_NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_copysign(const _Ty _Magnitude, const _Ty _Sign) { // constexpr copysign()
6166+
const auto _Signbit = _Bit_cast<typename _Float_traits<_Ty>::type>(_Sign) & _Float_traits<_Ty>::_Sign_mask;
6167+
return _Bit_cast<_Ty>(_Float_abs_bits(_Magnitude) | _Signbit);
6168+
}
6169+
61596170
// FUNCTION TEMPLATE _Is_nan
61606171
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
61616172
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan()
61626173
return _Float_abs_bits(_Xx) > _Float_traits<_Ty>::_Exponent_mask;
61636174
}
61646175

6176+
// FUNCTION TEMPLATE _Is_signaling_nan
6177+
// TRANSITION, workaround x86 ABI
6178+
// On x86 ABI, floating point by-value arguments and return values are passed in 80-bit x87 registers.
6179+
// When the value is a 32-bit or 64-bit signaling NaN, the conversion to/from 80-bit raises FE_INVALID
6180+
// and turns it into a quiet NaN. This behavior is undesirable if we want to test for signaling NaNs.
6181+
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
6182+
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN
6183+
const auto _Abs_bits = _Float_abs_bits(_Xx);
6184+
return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0);
6185+
}
6186+
6187+
// FUNCTION TEMPLATE _Is_inf
6188+
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
6189+
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_inf(const _Ty _Xx) { // constexpr isinf()
6190+
return _Float_abs_bits(_Xx) == _Float_traits<_Ty>::_Exponent_mask;
6191+
}
6192+
61656193
// FUNCTION TEMPLATE _Is_finite
61666194
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
61676195
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite()
@@ -6177,7 +6205,6 @@ struct _Nontrivial_dummy_type {
61776205
_STL_INTERNAL_STATIC_ASSERT(!is_trivially_default_constructible_v<_Nontrivial_dummy_type>);
61786206

61796207
_STD_END
6180-
#undef _CONSTEXPR_BIT_CAST
61816208
#pragma pop_macro("new")
61826209
_STL_RESTORE_CLANG_WARNINGS
61836210
#pragma warning(pop)

tests/std/include/fenv_prefix.hpp

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#pragma once
5+
6+
#ifdef FP_CONFIG_PRESET
7+
#if FP_CONFIG_PRESET == 3
8+
#define FP_PRESET_FAST 1
9+
#else // ^^^ FP_CONFIG_PRESET == 3 / FP_CONFIG_PRESET != 3 vvv
10+
#define FP_PRESET_FAST 0
11+
#endif // ^^^ FP_CONFIG_PRESET != 3 ^^^
12+
#endif // defined(FP_CONFIG_PRESET)
13+
14+
#ifdef FP_CONTRACT_MODE
15+
#ifdef __clang__
16+
17+
#if FP_CONTRACT_MODE == 0
18+
#pragma STDC FP_CONTRACT OFF
19+
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
20+
#pragma STDC FP_CONTRACT ON
21+
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
22+
#pragma STDC FP_CONTRACT ON
23+
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
24+
#error invalid FP_CONTRACT_MODE
25+
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
26+
27+
#else // ^^^ clang / MSVC vvv
28+
29+
#if FP_CONTRACT_MODE == 0
30+
#pragma fp_contract(off)
31+
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
32+
#pragma fp_contract(on)
33+
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
34+
#pragma fp_contract(on)
35+
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
36+
#error invalid FP_CONTRACT_MODE
37+
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
38+
39+
#endif // ^^^ MSVC ^^^
40+
#endif // defined(FP_CONTRACT_MODE)
41+
42+
#include <cassert>
43+
#include <float.h>
44+
45+
struct fenv_initializer_t {
46+
fenv_initializer_t() {
47+
#if WITH_FP_ABRUPT_UNDERFLOW
48+
{
49+
const errno_t result = _controlfp_s(nullptr, _DN_FLUSH, _MCW_DN);
50+
assert(result == 0);
51+
}
52+
#endif // WITH_FP_ABRUPT_UNDERFLOW
53+
}
54+
55+
~fenv_initializer_t() = default;
56+
57+
fenv_initializer_t(const fenv_initializer_t&) = delete;
58+
fenv_initializer_t& operator=(const fenv_initializer_t&) = delete;
59+
};
60+
61+
const fenv_initializer_t fenv_initializer{};

tests/std/test.lst

+1
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ tests\GH_000625_vector_bool_optimization
161161
tests\GH_000685_condition_variable_any
162162
tests\GH_000690_overaligned_function
163163
tests\GH_000890_pow_template
164+
tests\GH_000935_complex_numerical_accuracy
164165
tests\GH_000940_missing_valarray_copy
165166
tests\GH_001001_random_rejection_rounding
166167
tests\GH_001010_filesystem_error_encoding
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
RUNALL_INCLUDE ..\floating_point_model_matrix.lst
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#pragma once
5+
6+
#include <cassert>
7+
#include <cfenv>
8+
#include <cmath>
9+
#include <float.h>
10+
#include <type_traits>
11+
#include <xutility>
12+
13+
namespace fputil {
14+
template <typename T>
15+
using float_bits_t = typename _STD _Float_traits<T>::type;
16+
17+
template <typename T>
18+
_INLINE_VAR constexpr float_bits_t<T> magnitude_mask_v = _STD _Float_traits<T>::_Magnitude_mask;
19+
20+
template <typename T>
21+
_INLINE_VAR constexpr float_bits_t<T> exponent_mask_v = _STD _Float_traits<T>::_Exponent_mask;
22+
23+
template <typename T>
24+
_INLINE_VAR constexpr float_bits_t<T> significand_mask_v = magnitude_mask_v<T> & ~exponent_mask_v<T>;
25+
26+
template <typename T>
27+
_INLINE_VAR constexpr float_bits_t<T> sign_mask_v = _STD _Float_traits<T>::_Sign_mask;
28+
29+
template <typename T>
30+
_INLINE_VAR constexpr float_bits_t<T> norm_min_bits_v = significand_mask_v<T> + 1U;
31+
32+
template <typename T>
33+
_INLINE_VAR constexpr float_bits_t<T> norm_max_bits_v = exponent_mask_v<T> - 1U;
34+
35+
template <typename T>
36+
_INLINE_VAR constexpr float_bits_t<T> infinity_bits_v = exponent_mask_v<T>;
37+
38+
// not affected by abrupt underflow
39+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
40+
constexpr bool iszero(const T& x) {
41+
return _STD _Float_abs_bits(x) == 0;
42+
}
43+
44+
// not affected by /fp:fast
45+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
46+
constexpr bool signbit(const T& x) {
47+
const auto bits = std::_Bit_cast<float_bits_t<T>>(x);
48+
return (bits & sign_mask_v<T>) != 0;
49+
}
50+
51+
enum class rounding_mode {
52+
to_nearest_ties_even = FE_TONEAREST,
53+
toward_zero = FE_TOWARDZERO,
54+
toward_positive = FE_UPWARD,
55+
toward_negative = FE_DOWNWARD,
56+
};
57+
58+
bool is_directed_rounding_mode(const rounding_mode mode) {
59+
switch (mode) {
60+
case rounding_mode::to_nearest_ties_even:
61+
return false;
62+
63+
case rounding_mode::toward_zero:
64+
case rounding_mode::toward_positive:
65+
case rounding_mode::toward_negative:
66+
return true;
67+
68+
default:
69+
assert(false);
70+
return false;
71+
}
72+
}
73+
74+
#if TEST_FP_ROUNDING
75+
76+
#ifdef __clang__
77+
// TRANSITION, should be #pragma STDC FENV_ACCESS ON
78+
#else // ^^^ clang / MSVC vvv
79+
// TRANSITION, VSO-923474 -- should be #pragma STDC FENV_ACCESS ON
80+
#pragma fenv_access(on)
81+
#endif // ^^^ MSVC ^^^
82+
83+
constexpr rounding_mode all_rounding_modes[] = {
84+
rounding_mode::to_nearest_ties_even,
85+
rounding_mode::toward_zero,
86+
rounding_mode::toward_positive,
87+
rounding_mode::toward_negative,
88+
};
89+
90+
class rounding_guard {
91+
public:
92+
explicit rounding_guard(const rounding_mode mode) : old_mode{static_cast<rounding_mode>(std::fegetround())} {
93+
const int result = std::fesetround(static_cast<int>(mode));
94+
assert(result == 0);
95+
}
96+
97+
~rounding_guard() {
98+
const int result = std::fesetround(static_cast<int>(old_mode));
99+
assert(result == 0);
100+
}
101+
102+
rounding_guard(const rounding_guard&) = delete;
103+
rounding_guard& operator=(const rounding_guard&) = delete;
104+
105+
private:
106+
rounding_mode old_mode;
107+
};
108+
109+
#else // ^^^ alternative rounding modes / default rounding mode only vvv
110+
111+
constexpr rounding_mode all_rounding_modes[] = {rounding_mode::to_nearest_ties_even};
112+
113+
class rounding_guard {
114+
public:
115+
explicit rounding_guard(const rounding_mode mode) {
116+
static_cast<void>(mode);
117+
}
118+
119+
~rounding_guard() = default;
120+
121+
rounding_guard(const rounding_guard&) = delete;
122+
rounding_guard& operator=(const rounding_guard&) = delete;
123+
};
124+
125+
#endif // ^^^ default rounding mode only ^^^
126+
127+
// compares whether two floating point values are equal
128+
// all NaNs are equal, +0.0 and -0.0 are not equal
129+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
130+
bool precise_equal(const T& actual, const T& expected) {
131+
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
132+
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
133+
} else {
134+
return actual == expected && fputil::signbit(actual) == fputil::signbit(expected);
135+
}
136+
}
137+
138+
namespace detail {
139+
// 0x80...00 = zero, 0x80...01 = numeric_limits<T>::denorm_min(), 0x7f...ff = -numeric_limits<T>::denorm_min()
140+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
141+
float_bits_t<T> offset_representation(const T& x) {
142+
const float_bits_t<T> abs_bits = _STD _Float_abs_bits(x);
143+
return fputil::signbit(x) ? sign_mask_v<T> - abs_bits : sign_mask_v<T> + abs_bits;
144+
}
145+
146+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
147+
float_bits_t<T> is_offset_value_subnormal_or_zero(const float_bits_t<T> offset_value) {
148+
constexpr float_bits_t<T> positive_norm_min_offset = sign_mask_v<T> + norm_min_bits_v<T>;
149+
constexpr float_bits_t<T> negative_norm_min_offset = sign_mask_v<T> - norm_min_bits_v<T>;
150+
151+
return negative_norm_min_offset < offset_value && offset_value < positive_norm_min_offset;
152+
}
153+
154+
// number of ulps above zero, if we count [0, numeric_limits<T>::min()) as 1 ulp
155+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
156+
double abrupt_underflow_ulp(const float_bits_t<T> offset_value) {
157+
using bits_type = float_bits_t<T>;
158+
159+
constexpr bits_type offset_positive_norm_min = sign_mask_v<T> + norm_min_bits_v<T>;
160+
constexpr bits_type offset_negative_norm_min = sign_mask_v<T> - norm_min_bits_v<T>;
161+
162+
if (offset_value >= offset_positive_norm_min) {
163+
return 1.0 + (offset_value - offset_positive_norm_min);
164+
} else if (offset_value <= offset_negative_norm_min) {
165+
return -1.0 - (offset_negative_norm_min - offset_value);
166+
} else if (offset_value >= sign_mask_v<T>) {
167+
return static_cast<double>(offset_value - sign_mask_v<T>) / norm_min_bits_v<T>;
168+
} else {
169+
return -static_cast<double>(sign_mask_v<T> - offset_value) / norm_min_bits_v<T>;
170+
}
171+
}
172+
173+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
174+
bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) {
175+
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
176+
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
177+
}
178+
179+
if (_STD _Is_inf(expected)) {
180+
return actual == expected;
181+
}
182+
183+
if (fputil::signbit(actual) != fputil::signbit(expected)) {
184+
return false;
185+
}
186+
187+
using bits_type = float_bits_t<T>;
188+
189+
// compute ulp difference
190+
const bits_type actual_offset = detail::offset_representation(actual);
191+
const bits_type expected_offset = detail::offset_representation(expected);
192+
const bits_type ulp_diff =
193+
actual_offset < expected_offset ? expected_offset - actual_offset : actual_offset - expected_offset;
194+
195+
if (ulp_diff <= static_cast<unsigned int>(ulp_tolerance) && ulp_tolerance >= 0) {
196+
return true;
197+
}
198+
199+
#if WITH_FP_ABRUPT_UNDERFLOW
200+
// handle abrupt underflow
201+
if (detail::is_offset_value_subnormal_or_zero<T>(expected_offset)
202+
|| detail::is_offset_value_subnormal_or_zero<T>(actual_offset)) {
203+
const double adjusted_actual_ulp = detail::abrupt_underflow_ulp<T>(actual_offset);
204+
const double adjusted_expected_ulp = detail::abrupt_underflow_ulp<T>(expected_offset);
205+
const double adjusted_ulp_diff = std::abs(adjusted_actual_ulp - adjusted_expected_ulp);
206+
207+
if (adjusted_ulp_diff <= ulp_tolerance) {
208+
return true;
209+
}
210+
}
211+
#endif // WITH_FP_ABRUPT_UNDERFLOW
212+
213+
return false;
214+
}
215+
216+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
217+
bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) {
218+
return _STD _Is_finite(actual) && _STD _Is_finite(expected)
219+
&& std::abs(actual - expected) <= absolute_tolerance;
220+
}
221+
} // namespace detail
222+
223+
// returns whether floating point result is nearly equal to the expected value
224+
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
225+
bool near_equal(
226+
const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) {
227+
if (precise_equal(actual, expected)) {
228+
return true;
229+
}
230+
231+
if (ulp_tolerance > 0 && detail::is_within_ulp_tolerance(actual, expected, ulp_tolerance)) {
232+
return true;
233+
}
234+
235+
if (absolute_tolerance > 0 && detail::is_within_absolute_tolerance(actual, expected, absolute_tolerance)) {
236+
return true;
237+
}
238+
239+
return false;
240+
}
241+
} // namespace fputil

0 commit comments

Comments
 (0)