[libc][math][c23] Add exp2m1f16 C23 math function #105690

overmighty · 2024-08-22T16:29:12Z

Part of #95250.

llvmbot · 2024-08-22T16:29:48Z

@llvm/pr-subscribers-libc

Author: OverMighty (overmighty)

Changes

Part of #95250.

Patch is 21.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105690.diff

13 Files Affected:

(modified) libc/config/linux/x86_64/entrypoints.txt (+1)
(modified) libc/docs/math/index.rst (+1-1)
(modified) libc/spec/stdc.td (+1)
(modified) libc/src/math/CMakeLists.txt (+1)
(added) libc/src/math/exp2m1f16.h (+21)
(modified) libc/src/math/generic/CMakeLists.txt (+22-4)
(modified) libc/src/math/generic/exp2f16.cpp (+2-37)
(added) libc/src/math/generic/exp2m1f16.cpp (+161)
(modified) libc/src/math/generic/expxf16.h (+38)
(modified) libc/test/src/math/CMakeLists.txt (+11)
(added) libc/test/src/math/exp2m1f16_test.cpp (+40)
(modified) libc/test/src/math/smoke/CMakeLists.txt (+11)
(added) libc/test/src/math/smoke/exp2m1f16_test.cpp (+94)

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 65c5757efe6274..06ca526c671ea8 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -596,6 +596,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.copysignf16
     libc.src.math.exp10f16
     libc.src.math.exp2f16
+    libc.src.math.exp2m1f16
     libc.src.math.expf16
     libc.src.math.expm1f16
     libc.src.math.f16add
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 185d2d440849a0..0d2bbdd3381d19 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -292,7 +292,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | exp2      | |check|          | |check|         |                        | |check|              |                        | 7.12.6.4               | F.10.3.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| exp2m1    | |check|          |                 |                        |                      |                        | 7.12.6.5               | F.10.3.5                   |
+| exp2m1    | |check|          |                 |                        | |check|              |                        | 7.12.6.5               | F.10.3.5                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | expm1     | |check|          | |check|         |                        | |check|              |                        | 7.12.6.6               | F.10.3.6                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 118dcce829be23..c75343d567d7f9 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -596,6 +596,7 @@ def StdC : StandardSpec<"stdc"> {
           GuardedFunctionSpec<"exp2f16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
 
           FunctionSpec<"exp2m1f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+          GuardedFunctionSpec<"exp2m1f16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
 
           FunctionSpec<"expm1", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"expm1f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 3cba34fc249322..bd18c68a5e2fd6 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -115,6 +115,7 @@ add_math_entrypoint_object(exp2f)
 add_math_entrypoint_object(exp2f16)
 
 add_math_entrypoint_object(exp2m1f)
+add_math_entrypoint_object(exp2m1f16)
 
 add_math_entrypoint_object(exp10)
 add_math_entrypoint_object(exp10f)
diff --git a/libc/src/math/exp2m1f16.h b/libc/src/math/exp2m1f16.h
new file mode 100644
index 00000000000000..39fcebd5a69300
--- /dev/null
+++ b/libc/src/math/exp2m1f16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for exp2m1f16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_EXP2M1F16_H
+#define LLVM_LIBC_SRC_MATH_EXP2M1F16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 exp2m1f16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_EXP2M1F16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 350072f4b9649d..c565dd5cb04f5a 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1441,13 +1441,9 @@ add_entrypoint_object(
     .expxf16
     libc.hdr.errno_macros
     libc.hdr.fenv_macros
-    libc.src.__support.CPP.array
     libc.src.__support.FPUtil.except_value_utils
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
-    libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
     libc.src.__support.FPUtil.rounding_mode
     libc.src.__support.macros.optimization
   COMPILE_OPTIONS
@@ -1475,6 +1471,28 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  exp2m1f16
+  SRCS
+    exp2m1f16.cpp
+  HDRS
+    ../exp2m1f16.h
+  DEPENDS
+    .expxf16
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.common
+    libc.src.__support.FPUtil.except_value_utils
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   exp10
   SRCS
diff --git a/libc/src/math/generic/exp2f16.cpp b/libc/src/math/generic/exp2f16.cpp
index 66b79567040053..09d61d04d4cda9 100644
--- a/libc/src/math/generic/exp2f16.cpp
+++ b/libc/src/math/generic/exp2f16.cpp
@@ -10,13 +10,9 @@
 #include "expxf16.h"
 #include "hdr/errno_macros.h"
 #include "hdr/fenv_macros.h"
-#include "src/__support/CPP/array.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/except_value_utils.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
@@ -88,39 +84,8 @@ LLVM_LIBC_FUNCTION(float16, exp2f16, (float16 x)) {
   if (auto r = EXP2F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
     return r.value();
 
-  // For -25 < x < 16, to compute 2^x, we perform the following range reduction:
-  // find hi, mid, lo, such that:
-  //   x = hi + mid + lo, in which
-  //     hi is an integer,
-  //     mid * 2^3 is an integer,
-  //     -2^(-4) <= lo < 2^(-4).
-  // In particular,
-  //   hi + mid = round(x * 2^3) * 2^(-3).
-  // Then,
-  //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
-  // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
-  // by adding hi to the exponent field of 2^mid.  2^lo is computed using a
-  // degree-3 minimax polynomial generated by Sollya.
-
-  float xf = x;
-  float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
-  int x_hi_mid = static_cast<int>(kf);
-  int x_hi = x_hi_mid >> 3;
-  int x_mid = x_hi_mid & 0x7;
-  // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
-  float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);
-
-  uint32_t exp2_hi_mid_bits =
-      EXP2_MID_BITS[x_mid] +
-      static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
-  float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
-  // Degree-3 minimax polynomial generated by Sollya with the following
-  // commands:
-  //   > display = hexadecimal;
-  //   > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]);
-  //   > 1 + x * P;
-  float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f,
-                                   0x1.c6b4a6p-5f);
+  // exp2(x) = exp2(hi + mid) * exp2(lo)
+  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
   return static_cast<float16>(exp2_hi_mid * exp2_lo);
 }
 
diff --git a/libc/src/math/generic/exp2m1f16.cpp b/libc/src/math/generic/exp2m1f16.cpp
new file mode 100644
index 00000000000000..9d01e80ea59e2a
--- /dev/null
+++ b/libc/src/math/generic/exp2m1f16.cpp
@@ -0,0 +1,161 @@
+//===-- Half-precision 2^x - 1 function -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/exp2m1f16.h"
+#include "expxf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+static constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
+    // (input, RZ output, RU offset, RD offset, RN offset)
+    // x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
+    {0x0b3dU, 0x0904U, 1U, 0U, 1U},
+    // x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
+    {0x0d3fU, 0x0b45U, 1U, 0U, 1U},
+    // x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
+    {0x118cU, 0x0fb1U, 1U, 0U, 0U},
+    // x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
+    {0x21bcU, 0x1ffaU, 1U, 0U, 1U},
+    // x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
+    {0x9718U, 0x94eaU, 0U, 1U, 0U},
+    // x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
+    {0x973fU, 0x9505U, 0U, 1U, 0U},
+}};
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
+#else
+static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
+#endif
+
+static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
+    EXP2M1F16_EXCEPTS_HI = {{
+        // (input, RZ output, RU offset, RD offset, RN offset)
+        // x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
+        {0x3396U, 0x31b7U, 1U, 0U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
+        {0x34baU, 0x3345U, 1U, 0U, 0U},
+#endif
+        // x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
+        {0x36b6U, 0x3566U, 1U, 0U, 0U},
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+        // x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
+        {0x37b7U, 0x3659U, 1U, 0U, 1U},
+#endif
+        // x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
+        {0xb201U, 0xafcdU, 0U, 1U, 1U},
+        // x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
+        {0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
+        // x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
+        {0xb8a5U, 0xb54cU, 0U, 1U, 1U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA
+        // x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
+        {0xba8dU, 0xb6edU, 0U, 1U, 1U},
+#endif
+    }};
+
+LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
+    // exp2m1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When x >= 16.
+    if (x_u >= 0x4c00 && x_bits.is_pos()) {
+      // exp2m1(+inf) = +inf
+      if (x_bits.is_inf())
+        return FPBits::inf().get_val();
+
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_UPWARD:
+        fputil::set_errno_if_required(ERANGE);
+        fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
+        return FPBits::inf().get_val();
+      default:
+        return FPBits::max_normal().get_val();
+      }
+    }
+
+    // When x < -11.
+    if (x_u > 0xc980U) {
+      // exp2m1(-inf) = -1
+      if (x_bits.is_inf())
+        return FPBits::one(Sign::NEG).get_val();
+
+      // When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
+      if (x_u < 0xca00U) {
+        return fputil::round_result_slightly_down(
+            static_cast<float16>(-0x1.ffcp-1));
+      }
+
+      // When x <= -12, round(2^x - 1, HP, RN) = -1.
+      switch (fputil::quick_get_round()) {
+      case FE_TONEAREST:
+      case FE_DOWNWARD:
+        return FPBits::one(Sign::NEG).get_val();
+      default:
+        return static_cast<float16>(-0x1.ffcp-1);
+      }
+    }
+
+    // When |x| <= 2^(-3).
+    if (x_abs <= 0x3000U) {
+      if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
+          LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+
+      float xf = x;
+      // Degree-5 minimax polynomial generated by Sollya with the following
+      // commands:
+      //   > display = hexadecimal;
+      //   > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
+      //   > x * P;
+      return static_cast<float16>(
+          xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
+                                0x1.c6af88p-5f, 0x1.3b45d6p-7f,
+                                0x1.641e7cp-10f));
+    }
+  }
+
+  if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+
+  // exp2(x) = exp2(hi + mid) * exp2(lo)
+  auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
+  // exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
+  return static_cast<float16>(
+      fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h
index a0db6cee438e92..35294130a15007 100644
--- a/libc/src/math/generic/expxf16.h
+++ b/libc/src/math/generic/expxf16.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
 
 #include "src/__support/CPP/array.h"
+#include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/multiply_add.h"
 #include "src/__support/FPUtil/nearest_integer.h"
@@ -89,6 +90,43 @@ constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = {
     0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U,
 };
 
+LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
+  // For -25 < x < 16, to compute 2^x, we perform the following range reduction:
+  // find hi, mid, lo, such that:
+  //   x = hi + mid + lo, in which
+  //     hi is an integer,
+  //     mid * 2^3 is an integer,
+  //     -2^(-4) <= lo < 2^(-4).
+  // In particular,
+  //   hi + mid = round(x * 2^3) * 2^(-3).
+  // Then,
+  //   2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
+  // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
+  // by adding hi to the exponent field of 2^mid.  2^lo is computed using a
+  // degree-3 minimax polynomial generated by Sollya.
+
+  float xf = x;
+  float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
+  int x_hi_mid = static_cast<int>(kf);
+  int x_hi = x_hi_mid >> 3;
+  int x_mid = x_hi_mid & 0x7;
+  // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
+  float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);
+
+  uint32_t exp2_hi_mid_bits =
+      EXP2_MID_BITS[x_mid] +
+      static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
+  float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]);
+  //   > 1 + x * P;
+  float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f,
+                                   0x1.c6b4a6p-5f);
+  return {exp2_hi_mid, exp2_lo};
+}
+
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 0c4118c3694548..07a9405081f97d 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1003,6 +1003,17 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  exp2m1f16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    exp2m1f16_test.cpp
+  DEPENDS
+    libc.src.math.exp2m1f16
+)
+
 add_fp_unittest(
   exp10_test
   NEED_MPFR
diff --git a/libc/test/src/math/exp2m1f16_test.cpp b/libc/test/src/math/exp2m1f16_test.cpp
new file mode 100644
index 00000000000000..bf299118429d7e
--- /dev/null
+++ b/libc/test/src/math/exp2m1f16_test.cpp
@@ -0,0 +1,40 @@
+//===-- Exhaustive test for exp2m1f16 -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/exp2m1f16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcExp2m1f16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf];
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0];
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcExp2m1f16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp2m1, x,
+                                   LIBC_NAMESPACE::exp2m1f16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcExp2m1f16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp2m1, x,
+                                   LIBC_NAMESPACE::exp2m1f16(x), 0.5);
+  }
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 7271e933b9311d..349716d44cfb20 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -1106,6 +1106,17 @@ add_fp_unittest(
     libc.src.math.exp2m1f
 )
 
+add_fp_unittest(
+  exp2m1f16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    exp2m1f16_test.cpp
+  DEPENDS
+    libc.src.errno.errno
+    libc.src.math.exp2m1f16
+)
+
 add_fp_unittest(
   exp10_test
   SUITE
diff --git a/libc/test/src/math/smoke/exp2m1f16_test.cpp b/libc/test/src/math/smoke/exp2m1f16_test.cpp
new file mode 100644
index 00000000000000..7e6c2050abbbad
--- /dev/null
+++ b/libc/test/src/math/smoke/exp2m1f16_test.cpp
@@ -0,0 +1,94 @@
+//===-- Unittests for exp2m1f16 -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/exp2m1f16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcExp2m1f16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcExp2m1f16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::exp2m1f16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::exp2m1f16(sNaN),
+                              FE_INVALID);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::exp2m1f16(inf));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(static_cast<float16>(-1.0),
+                            LIBC_NAMESPACE::exp2m1f16(neg_inf));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::exp2m1f16(zero));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::exp2m1f16(neg_zero));
+  EX...
[truncated]

jhuber6

Could you go ahead and add this to the GPU entrypoints? I'm trying to keep the mostly in sync. (Though I need to figure out why a few of them fail).

overmighty · 2024-08-22T16:37:23Z

Done. I haven't run the GPU build in a while so I haven't tested it yet, as it'll probably take some time to run the GPU build again.

jhuber6 · 2024-08-22T16:41:23Z

Done. I haven't run the GPU build in a while so I haven't tested it yet, as it'll probably take some time to run the GPU build again.

I'll test it in a bit.

jhuber6 · 2024-08-22T17:03:23Z

libc/config/gpu/entrypoints.txt

@@ -486,6 +486,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
    libc.src.math.ceilf16
    libc.src.math.copysignf16
    libc.src.math.exp10f16
+    libc.src.math.exp10m1f16


Is this the right name?

jhuber6 · 2024-08-22T17:10:01Z

FAILED: libc/test/src/math/smoke/libc.test.src.math.smoke.exp2m1f16_test.__hermetic__-cmd /home/jhuber/Documents/llvm/llvm-project/build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/src/math/smoke/libc.test.src.math.smoke.exp2m1f16_test.__hermetic__-cmd 
cd /home/jhuber/Documents/llvm/llvm-project/build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/src/math/smoke && /home/jhuber/Documents/llvm/llvm-project/build/bin/amdhsa-loader /home/jhuber/Documents/llvm/llvm-project/build/runtimes/runtimes-amdgcn-amd-amdhsa-bins/libc/test/src/math/smoke/libc.test.src.math.smoke.exp2m1f16_test.__hermetic__.__build__
[==========] Running 3 tests from 1 test suite.
[ RUN      ] LlvmLibcExp2m1f16Test.SpecialNumbers
/home/jhuber/Documents/llvm/llvm-project/libc/test/src/math/smoke/exp2m1f16_test.cpp:38: FAILURE
Failed to match (__llvm_libc_20_0_0_git::exp2m1f16(neg_zero)) against LIBC_NAMESPACE::testing::getMatcher< LIBC_NAMESPACE::testing::TestCond::EQ>((neg_zero)).
Expected floating point value: 0x8000 = (S: 1, E: 0x0000, M: 0x0000)
Actual floating point value: 0x0000 = (S: 0, E: 0x0000, M: 0x0000)

Hm, works fine on NVPTX but fails on AMDGPU. Wonder why the sign is off.

jhuber6 · 2024-08-22T17:38:27Z

I poked at it but couldn't figure out why it's giving 0.0 instead of -0.0. Seems to work on NVPTX so I don't know what's different here. Ruled out __builtin_rint and optimizations, so it might just be some weird lowering. @arsenm might have a guess? Otherwise, feel free to remove (or comment it out with a note) for the GPU case so you're not blocked on that.

arsenm · 2024-08-22T17:59:48Z

I poked at it but couldn't figure out why it's giving 0.0 instead of -0.0. Seems to work on NVPTX so I don't know what's different here. Ruled out __builtin_rint and optimizations, so it might just be some weird lowering. @arsenm might have a guess? Otherwise, feel free to remove (or comment it out with a note) for the GPU case so you're not blocked on that.

exp2 should be correctly lowered and it's just a bug if it's not. Is the bug in whatever this emitted or the llvm.exp2.f16 lowering? Can you post IR/ISA output?

overmighty · 2024-08-22T21:21:59Z

This doesn't use @llvm.exp2.f16. We have our own implementations of exponential functions using polynomial approximations. I'll try to post IR and AMDGPU assembly output tomorrow if Joseph doesn't beat me to it.

libc/spec/stdc.td

overmighty · 2024-09-26T16:59:19Z

exp2 should be correctly lowered and it's just a bug if it's not. Is the bug in whatever this emitted or the llvm.exp2.f16 lowering? Can you post IR/ISA output?

Sorry for the (long) delay, here's the IR output: https://gist.github.com/overmighty/20ec6b34f36d6b02c8009d85fa6e0794.

arsenm · 2024-09-27T05:02:01Z

exp2 should be correctly lowered and it's just a bug if it's not. Is the bug in whatever this emitted or the llvm.exp2.f16 lowering? Can you post IR/ISA output?

Sorry for the (long) delay, here's the IR output: https://gist.github.com/overmighty/20ec6b34f36d6b02c8009d85fa6e0794.

Why are there any volatile accesses? There shouldn't be any z

But still, this should just emit llvm.exp2

overmighty · 2024-10-09T23:55:09Z

Why are there any volatile accesses? There shouldn't be any z

Some of them are from https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/rounding_mode.h#L67. I'm not sure about the others.

But still, this should just emit llvm.exp2

I don't think any higher math function implementation in LLVM libc emits the respective LLVM intrinsic currently.

arsenm · 2024-10-10T10:49:30Z

Why are there any volatile accesses? There shouldn't be any z

Some of them are from https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/rounding_mode.h#L67. I'm not sure about the others.

These volatiles should be removed. The tests will work just fine without the forced stack access

jhuber6 · 2024-10-10T12:43:57Z

Why are there any volatile accesses? There shouldn't be any z

Some of them are from https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/rounding_mode.h#L67. I'm not sure about the others.

These volatiles should be removed. The tests will work just fine without the forced stack access

The usage seems to be with the rounding mode check, so it's important that nothing gets constant propagated. I actually don't know how the rounding mode i handled when

Why are there any volatile accesses? There shouldn't be any z

Some of them are from https://github.com/llvm/llvm-project/blob/main/libc/src/__support/FPUtil/rounding_mode.h#L67. I'm not sure about the others.

These volatiles should be removed. The tests will work just fine without the forced stack access

These try to get the rounding mode through math operations so it's really important that it's not constant propagated. If you make it a constant then it will go through the LLVM optimizer which just assumes it's to-nearest, see https://godbolt.org/z/ehejrv1zK. We could probably avoid the volatile with some inline assembly or something.

arsenm · 2024-10-10T12:49:16Z

The usage seems to be with the rounding mode check, so it's important that nothing gets constant propagated. I actually don't know how the rounding mode i handled when

They won't be if you are using strictfp (as you must be if you depend on the rounding mode). If you do know the rounding mode, they will correctly fold.

These try to get the rounding mode through math operations so it's really important that it's not constant propagated. If you make it a constant then it will go through the LLVM optimizer which just assumes it's to-nearest, see https://godbolt.org/z/ehejrv1zK. We could probably avoid the volatile with some inline assembly or something.

You need to enable fenv access

jhuber6 · 2024-10-10T14:27:55Z

You need to enable fenv access

That doesn't work on the GPU targets yet https://godbolt.org/z/6n3svs1ne. But I guess NVPTX doesn't have fenv access and I never got around to merging the AMDGPU version.

arsenm · 2024-10-10T14:39:57Z

That doesn't work on the GPU targets yet https://godbolt.org/z/6n3svs1ne. But I guess NVPTX doesn't have fenv access and I never got around to merging the AMDGPU version.

Correct, but you shouldn't design around working around unimplemented features. Full GPU strictfp support isn't coming anytime soon. It is still wrong to rely on this check without it

arsenm · 2024-10-10T14:45:06Z

That doesn't work on the GPU targets yet https://godbolt.org/z/6n3svs1ne. But I guess NVPTX doesn't have fenv access and I never got around to merging the AMDGPU version.

Correct, but you shouldn't design around working around unimplemented features. Full GPU strictfp support isn't coming anytime soon. It is still wrong to rely on this check without it

The other really broken thing is the -fexperimental-strict-floating-point flag doesn't do anything other than for the hardcoded targets in clang, making it useless as a bringup aid

lntue · 2024-10-10T15:03:06Z

That doesn't work on the GPU targets yet https://godbolt.org/z/6n3svs1ne. But I guess NVPTX doesn't have fenv access and I never got around to merging the AMDGPU version.

Correct, but you shouldn't design around working around unimplemented features. Full GPU strictfp support isn't coming anytime soon. It is still wrong to rely on this check without it

The other really broken thing is the -fexperimental-strict-floating-point flag doesn't do anything other than for the hardcoded targets in clang, making it useless as a bringup aid

We can omit the gpu targets for now. I'm going to add LIBC_MATH_DEFAULT_ROUNDING_ONLY option to cleanly omit all the directional rounding codes for targets requesting it.

Part of #95250.

overmighty · 2024-10-11T20:05:02Z

Rebased and added the last 3 commits.

overmighty requested a review from lntue August 22, 2024 16:29

llvmbot added the libc label Aug 22, 2024

overmighty mentioned this pull request Aug 22, 2024

[gsoc2024][libc][math][c23] Implement _Float16 higher math functions #95250

Open

54 tasks

jhuber6 reviewed Aug 22, 2024

View reviewed changes

lntue reviewed Aug 26, 2024

View reviewed changes

libc/spec/stdc.td Show resolved Hide resolved

overmighty added 7 commits October 11, 2024 20:34

[libc][math][c23] Add exp2m1f16 C23 math function

f8b28f9

Part of #95250.

Enable exp2m1f16 entrypoint on GPUs

52d998d

Fix entrypoint name in GPU config

430b14a

Update includes and CMake target dependencies

da6b98a

Migrate to fputil::cast and use implicit casts for constants

eca8d05

Disable exp2m1f16 on GPUs for now

5de4d1b

Add exp2m1f16 in newhdrgen math.yaml

f3ea05a

overmighty force-pushed the users/overmighty/libc-math-exp2m1f16 branch from 9c4fca7 to f3ea05a Compare October 11, 2024 20:04

lntue approved these changes Oct 11, 2024

View reviewed changes

overmighty added 2 commits October 12, 2024 13:17

Update CMake target dependencies

5058e8c

Remove braces from single-statement if in exp2m1f16

1f17209

overmighty merged commit ce65d4e into main Oct 12, 2024
8 checks passed

overmighty deleted the users/overmighty/libc-math-exp2m1f16 branch October 12, 2024 17:31

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[libc][math][c23] Add exp2m1f16 C23 math function #105690

[libc][math][c23] Add exp2m1f16 C23 math function #105690

overmighty commented Aug 22, 2024

llvmbot commented Aug 22, 2024

jhuber6 left a comment

overmighty commented Aug 22, 2024

jhuber6 commented Aug 22, 2024

jhuber6 Aug 22, 2024

overmighty Aug 22, 2024

overmighty Aug 22, 2024

jhuber6 commented Aug 22, 2024

jhuber6 commented Aug 22, 2024

arsenm commented Aug 22, 2024 •

edited

Loading

overmighty commented Aug 22, 2024

overmighty commented Sep 26, 2024

arsenm commented Sep 27, 2024

overmighty commented Oct 9, 2024

arsenm commented Oct 10, 2024

jhuber6 commented Oct 10, 2024

arsenm commented Oct 10, 2024

jhuber6 commented Oct 10, 2024

arsenm commented Oct 10, 2024 •

edited

Loading

arsenm commented Oct 10, 2024

lntue commented Oct 10, 2024

overmighty commented Oct 11, 2024

[libc][math][c23] Add exp2m1f16 C23 math function #105690

[libc][math][c23] Add exp2m1f16 C23 math function #105690

Conversation

overmighty commented Aug 22, 2024

llvmbot commented Aug 22, 2024

jhuber6 left a comment

Choose a reason for hiding this comment

overmighty commented Aug 22, 2024

jhuber6 commented Aug 22, 2024

jhuber6 Aug 22, 2024

Choose a reason for hiding this comment

overmighty Aug 22, 2024

Choose a reason for hiding this comment

overmighty Aug 22, 2024

Choose a reason for hiding this comment

jhuber6 commented Aug 22, 2024

jhuber6 commented Aug 22, 2024

arsenm commented Aug 22, 2024 • edited Loading

overmighty commented Aug 22, 2024

overmighty commented Sep 26, 2024

arsenm commented Sep 27, 2024

overmighty commented Oct 9, 2024

arsenm commented Oct 10, 2024

jhuber6 commented Oct 10, 2024

arsenm commented Oct 10, 2024

jhuber6 commented Oct 10, 2024

arsenm commented Oct 10, 2024 • edited Loading

arsenm commented Oct 10, 2024

lntue commented Oct 10, 2024

overmighty commented Oct 11, 2024

arsenm commented Aug 22, 2024 •

edited

Loading

arsenm commented Oct 10, 2024 •

edited

Loading