-
Notifications
You must be signed in to change notification settings - Fork 38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Potential floating point optimizations #592
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -332,6 +332,19 @@ inline constexpr T popcnt(T value) noexcept | |
return static_cast<T>(popcount(value)); | ||
} | ||
|
||
template <typename T> | ||
T signbit(T value) noexcept = delete; | ||
|
||
inline bool signbit(float value) noexcept | ||
{ | ||
return (bit_cast<uint32_t>(value) & F32SignMask) != 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This generates two very simple instructions with GCC, but clang detects this pattern and replaces it with the SSE instruction. |
||
} | ||
|
||
inline bool signbit(double value) noexcept | ||
{ | ||
return (bit_cast<uint64_t>(value) & F64SignMask) != 0; | ||
} | ||
|
||
template <typename T> | ||
T fabs(T value) noexcept = delete; | ||
|
||
|
@@ -362,6 +375,25 @@ inline double fneg(double value) noexcept | |
return bit_cast<double>(bit_cast<uint64_t>(value) ^ F64SignMask); | ||
} | ||
|
||
template <typename T> | ||
T copysign(T a, T b) noexcept = delete; | ||
|
||
template <> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These should not be template specializations. Function overloading is good enough. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't add this, just moved this function. Can remove. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still good time to improve it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Actually, I've learned this is not enough because of the way this is used. Ignore this request. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, learned that yesterday too, but was lazy to close the PR 😓 |
||
inline float copysign(float a, float b) noexcept | ||
{ | ||
const auto a_u = bit_cast<uint32_t>(a); | ||
const auto b_u = bit_cast<uint32_t>(b); | ||
return bit_cast<float>((a_u & F32AbsMask) | (b_u & F32SignMask)); | ||
} | ||
|
||
template <> | ||
inline double copysign(double a, double b) noexcept | ||
{ | ||
const auto a_u = bit_cast<uint64_t>(a); | ||
const auto b_u = bit_cast<uint64_t>(b); | ||
return bit_cast<double>((a_u & F64AbsMask) | (b_u & F64SignMask)); | ||
} | ||
|
||
template <typename T> | ||
inline T fceil(T value) noexcept | ||
{ | ||
|
@@ -389,7 +421,7 @@ inline T ffloor(T value) noexcept | |
// the __builtin_floor() outputs -0 where it should +0. | ||
// The following workarounds the issue by using the fact that the sign of | ||
// the output must always match the sign of the input value. | ||
return std::copysign(result, value); | ||
return copysign(result, value); | ||
} | ||
|
||
template <typename T> | ||
|
@@ -419,8 +451,8 @@ T fnearest(T value) noexcept | |
|
||
// This implementation is based on adjusting the result produced by trunc() by +-1 when needed. | ||
const auto t = std::trunc(value); | ||
if (const auto diff = std::abs(value - t); diff > T{0.5} || (diff == T{0.5} && !is_even(t))) | ||
return t + std::copysign(T{1}, value); | ||
if (const auto diff = fabs(value - t); diff > T{0.5} || (diff == T{0.5} && !is_even(t))) | ||
return t + copysign(T{1}, value); | ||
else | ||
return t; | ||
} | ||
|
@@ -441,7 +473,7 @@ inline T fmin(T a, T b) noexcept | |
if (std::isnan(a) || std::isnan(b)) | ||
return std::numeric_limits<T>::quiet_NaN(); // Positive canonical NaN. | ||
|
||
if (a == 0 && b == 0 && (std::signbit(a) == 1 || std::signbit(b) == 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chfast There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't remember. Probably it is more clear that signbit is |
||
if (a == 0 && b == 0 && (signbit(a) || signbit(b))) | ||
return -T{0}; | ||
|
||
return b < a ? b : a; | ||
|
@@ -455,30 +487,12 @@ inline T fmax(T a, T b) noexcept | |
if (std::isnan(a) || std::isnan(b)) | ||
return std::numeric_limits<T>::quiet_NaN(); // Positive canonical NaN. | ||
|
||
if (a == 0 && b == 0 && (std::signbit(a) == 0 || std::signbit(b) == 0)) | ||
if (a == 0 && b == 0 && (!signbit(a) || !signbit(b))) | ||
return T{0}; | ||
|
||
return a < b ? b : a; | ||
} | ||
|
||
template <typename T> | ||
T fcopysign(T a, T b) noexcept = delete; | ||
|
||
template <> | ||
inline float fcopysign(float a, float b) noexcept | ||
{ | ||
const auto a_u = bit_cast<uint32_t>(a); | ||
const auto b_u = bit_cast<uint32_t>(b); | ||
return bit_cast<float>((a_u & F32AbsMask) | (b_u & F32SignMask)); | ||
} | ||
|
||
template <> | ||
inline double fcopysign(double a, double b) noexcept | ||
{ | ||
const auto a_u = bit_cast<uint64_t>(a); | ||
const auto b_u = bit_cast<uint64_t>(b); | ||
return bit_cast<double>((a_u & F64AbsMask) | (b_u & F64SignMask)); | ||
} | ||
|
||
__attribute__((no_sanitize("float-cast-overflow"))) inline constexpr float demote( | ||
double value) noexcept | ||
|
@@ -1347,7 +1361,7 @@ ExecutionResult execute( | |
} | ||
case Instr::f32_copysign: | ||
{ | ||
binary_op(stack, fcopysign<float>); | ||
binary_op(stack, copysign<float>); | ||
break; | ||
} | ||
|
||
|
@@ -1419,7 +1433,7 @@ ExecutionResult execute( | |
} | ||
case Instr::f64_copysign: | ||
{ | ||
binary_op(stack, fcopysign<double>); | ||
binary_op(stack, copysign<double>); | ||
break; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We may get a proper
constexpr
signbit
in C++23: https://open-std.org/JTC1/SC22/WG21/docs/papers/2019/p0533r5.pdf