This patch fixes two problems (#592)

shibatch · Naoki Shibata · web-flow · commit f95a6c866c24 · 2024-10-16T18:35:11.000+09:00
This patch fixes the following two problems:

* On aarch32, cbrt(4) was returned instead of M_PI/2 when +-INFINITY is given to atanf.
* Due to a bug in gcc-13, sign of tanf(+-0) was wrong on PUREC_SCALAR and VXE.

This patch also adds testing for the first problem.
The second problem is detected by the existing tester.

---------

Co-authored-by: Naoki Shibata &lt;shibatch.sf.net@gmail.com&gt;
diff --git a/src/libm-tester/tester.c b/src/libm-tester/tester.c
@@ -4825,6 +4825,8 @@ void do_test() {
     fprintf(stderr, "atanf : ");
     for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5);
     for(d = -10000;d < 10000 && success;d += 2.1) checkAccuracy_f(mpfr_atan, child_atanf, d, 3.5);
+    checkAccuracy_f(mpfr_atan, child_atanf, +INFINITY, 3.5);
+    checkAccuracy_f(mpfr_atan, child_atanf, -INFINITY, 3.5);
     showResult(success);
 
     //
diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c
@@ -853,6 +853,9 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) {
   if (LIKELY(vtestallones_i_vo32(vlt_vo_vf_vf(vabs_vf_vf(d), vcast_vf_f(TRIGRANGEMAX2f*0.5f))))) {
     q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI))));
     u = vcast_vf_vi2(q);
+#if (defined(ENABLE_PUREC_SCALAR) || defined(ENABLE_PURECFMA_SCALAR) || defined(ENABLE_VXE) || defined(ENABLE_VXENOFMA) || defined(ENABLE_VXE2) || defined(ENABLE_VXE2NOFMA)) && !defined(__clang__) && __GNUC__ == 13
+    u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), vcast_vf_f(0), u);
+#endif
     x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), x);
     x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x);
     x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x);
@@ -908,6 +911,9 @@ EXPORT CONST VECTOR_CC vfloat xtanf(vfloat d) {
 
   q = vrint_vi2_vf(vmul_vf_vf_vf(d, vcast_vf_f((float)(2 * M_1_PI))));
   u = vcast_vf_vi2(q);
+#if (defined(ENABLE_PUREC_SCALAR) || defined(ENABLE_PURECFMA_SCALAR) || defined(ENABLE_VXE) || defined(ENABLE_VXENOFMA) || defined(ENABLE_VXE2) || defined(ENABLE_VXE2NOFMA)) && !defined(__clang__) && __GNUC__ == 13
+    u = vsel_vf_vo_vf_vf(veq_vo_vi2_vi2(q, vcast_vi2_i(0)), vcast_vf_f(0), u);
+#endif
   x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_A2f*0.5f), d);
   x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_B2f*0.5f), x);
   x = vmla_vf_vf_vf_vf(u, vcast_vf_f(-PI_C2f*0.5f), x);
@@ -1770,7 +1776,7 @@ EXPORT CONST VECTOR_CC vfloat xatanf(vfloat d) {
   t = vreinterpret_vf_vm(vxor_vm_vm_vm(vand_vm_vo32_vm(veq_vo_vi2_vi2(vand_vi2_vi2_vi2(q, vcast_vi2_i(2)), vcast_vi2_i(2)), vreinterpret_vm_vf(vcast_vf_f(-0.0f))), vreinterpret_vm_vf(t)));
 
 #if defined(ENABLE_NEON32) || defined(ENABLE_NEON32VFPV4)
-  t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.5874010519681994747517056f), d), t);
+  t = vsel_vf_vo_vf_vf(visinf_vo_vf(d), vmulsign_vf_vf_vf(vcast_vf_f(1.570796326794896557998982), d), t);
 #endif
 
   return t;