Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize fabs, fneg, fcopysign #560

Merged
merged 1 commit into from
Oct 8, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 61 additions & 13 deletions lib/fizzy/execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// SPDX-License-Identifier: Apache-2.0

#include "execute.hpp"
#include "cxx20/bit.hpp"
#include "stack.hpp"
#include "trunc_boundaries.hpp"
#include "types.hpp"
Expand All @@ -19,6 +20,11 @@ namespace
// code_offset + imm_offset + stack_height
constexpr auto BranchImmediateSize = 3 * sizeof(uint32_t);

constexpr uint32_t F32AbsMask = 0x7fffffff;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could go crazy and use std::numeric_limits<uint32_t>::max() >> 1, but the constant is nicer.

constexpr uint32_t F32SignMask = ~F32AbsMask;
constexpr uint64_t F64AbsMask = 0x7fffffffffffffff;
constexpr uint64_t F64SignMask = ~F64AbsMask;

template <typename T>
inline T read(const uint8_t*& input) noexcept
{
Expand Down Expand Up @@ -293,6 +299,36 @@ inline uint64_t popcnt64(uint64_t value) noexcept
return static_cast<uint64_t>(__builtin_popcountll(value));
}

template <typename T>
T fabs(T value) noexcept = delete;

template <>
inline float fabs(float value) noexcept
{
return bit_cast<float>(bit_cast<uint32_t>(value) & F32AbsMask);
}

template <>
inline double fabs(double value) noexcept
{
return bit_cast<double>(bit_cast<uint64_t>(value) & F64AbsMask);
}

template <typename T>
T fneg(T value) noexcept = delete;

template <>
inline float fneg(float value) noexcept
{
return bit_cast<float>(bit_cast<uint32_t>(value) ^ F32SignMask);
}

template <>
inline double fneg(double value) noexcept
{
return bit_cast<double>(bit_cast<uint64_t>(value) ^ F64SignMask);
}

template <typename T>
inline T fceil(T value) noexcept
{
Expand Down Expand Up @@ -392,6 +428,25 @@ inline T fmax(T a, T b) noexcept
return a < b ? b : a;
}

template <typename T>
T fcopysign(T a, T b) noexcept = delete;

template <>
inline float fcopysign(float a, float b) noexcept
{
const auto a_u = bit_cast<uint32_t>(a);
const auto b_u = bit_cast<uint32_t>(b);
return bit_cast<float>((a_u & F32AbsMask) | (b_u & F32SignMask));
}

template <>
inline double fcopysign(double a, double b) noexcept
{
const auto a_u = bit_cast<uint64_t>(a);
const auto b_u = bit_cast<uint64_t>(b);
return bit_cast<double>((a_u & F64AbsMask) | (b_u & F64SignMask));
}

__attribute__((no_sanitize("float-cast-overflow"))) inline constexpr float demote(
double value) noexcept
{
Expand Down Expand Up @@ -1223,13 +1278,12 @@ ExecutionResult execute(Instance& instance, FuncIdx func_idx, const Value* args,

case Instr::f32_abs:
{
// TODO: This can be optimized https://godbolt.org/z/aPqvfo
unary_op(stack, static_cast<float (*)(float)>(std::fabs));
unary_op(stack, fabs<float>);
break;
}
case Instr::f32_neg:
{
unary_op(stack, std::negate<float>{});
unary_op(stack, fneg<float>);
break;
}
case Instr::f32_ceil:
Expand Down Expand Up @@ -1290,24 +1344,18 @@ ExecutionResult execute(Instance& instance, FuncIdx func_idx, const Value* args,
}
case Instr::f32_copysign:
{
// TODO: This is not optimal implementation. The std::copysign() is inlined, but
// it affects the compiler to still use SSE vectors (probably due to C++ ABI)
// while this can be implemented with just generic registers and integer
// instructions: (a & ABS_MASK) | (b & SIGN_MASK).
// https://godbolt.org/z/aPqvfo
binary_op(stack, static_cast<float (*)(float, float)>(std::copysign));
binary_op(stack, fcopysign<float>);
break;
}

case Instr::f64_abs:
{
// TODO: This can be optimized https://godbolt.org/z/aPqvfo
unary_op(stack, static_cast<double (*)(double)>(std::fabs));
unary_op(stack, fabs<double>);
break;
}
case Instr::f64_neg:
{
unary_op(stack, std::negate<double>{});
unary_op(stack, fneg<double>);
break;
}
case Instr::f64_ceil:
Expand Down Expand Up @@ -1368,7 +1416,7 @@ ExecutionResult execute(Instance& instance, FuncIdx func_idx, const Value* args,
}
case Instr::f64_copysign:
{
binary_op(stack, static_cast<double (*)(double, double)>(std::copysign));
binary_op(stack, fcopysign<double>);
break;
}

Expand Down