Move self_greater to the is_greater callable

jfalcou · Jan 27, 2025 · d0cff16 · d0cff16
1 parent 08cc362
commit d0cff16
Show file tree

Hide file tree

Showing 15 changed files with 348 additions and 270 deletions.
diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp
@@ -33,6 +33,7 @@
 #include <eve/module/core/regular/logical_not.hpp>
 #include <eve/module/core/regular/shl.hpp>
 #include <eve/module/core/regular/shr.hpp>
+#include <eve/module/core/regular/is_greater.hpp>
 #include <eve/memory/soa_ptr.hpp>
 #include <eve/traits/product_type.hpp>
 
@@ -890,32 +891,32 @@ namespace eve
     }
 
     //! @brief Element-wise greater-than comparison between eve::wide
-    friend EVE_FORCEINLINE auto operator>(wide v, wide w) noexcept
+    friend EVE_FORCEINLINE auto operator>(wide a, wide b) noexcept
 #if !defined(EVE_DOXYGEN_INVOKED)
         requires(supports_ordering_v<Type>)
 #endif
     {
-      return detail::self_greater(v, w);
+      return is_greater(a, b);
     }
 
     //! @brief Element-wise greater-than comparison between a eve::wide and a scalar
     template<scalar_value S>
-    friend EVE_FORCEINLINE auto operator>(wide v, S w) noexcept
+    friend EVE_FORCEINLINE auto operator>(wide w, S s) noexcept
 #if !defined(EVE_DOXYGEN_INVOKED)
         requires(supports_ordering_v<Type>)
 #endif
     {
-      return v > wide {w};
+      return is_greater(w, s);
     }
 
     //! @brief Element-wise greater-than comparison between a scalar and a eve::wide
     template<scalar_value S>
-    friend EVE_FORCEINLINE auto operator>(S v, wide w) noexcept
+    friend EVE_FORCEINLINE auto operator>(S s, wide w) noexcept
 #if !defined(EVE_DOXYGEN_INVOKED)
         requires(supports_ordering_v<Type>)
 #endif
     {
-      return wide {v} > w;
+      return is_greater(s, w);
     }
 
     //! @brief Element-wise greater-or-equal comparison between eve::wide

diff --git a/include/eve/detail/function/simd/arm/neon/friends.hpp b/include/eve/detail/function/simd/arm/neon/friends.hpp
@@ -62,41 +62,6 @@ namespace eve::detail
     return !(v == w);
   }
 
-  template<typename T, typename N>
-  EVE_FORCEINLINE logical<wide<T, N>> self_greater( wide<T, N> v
-                                                      , wide<T, N> w
-                                                      ) noexcept
-      requires arm_abi<abi_t<T, N>>
-  {
-    constexpr auto cat = categorize<wide<T, N>>();
-
-          if constexpr( cat == category::int32x4  ) return vcgtq_s32(v, w);
-    else  if constexpr( cat == category::int16x8  ) return vcgtq_s16(v, w);
-    else  if constexpr( cat == category::int8x16  ) return vcgtq_s8(v, w);
-    else  if constexpr( cat == category::uint32x4 ) return vcgtq_u32(v, w);
-    else  if constexpr( cat == category::uint16x8 ) return vcgtq_u16(v, w);
-    else  if constexpr( cat == category::uint8x16 ) return vcgtq_u8(v, w);
-    else  if constexpr( cat == category::float32x4) return vcgtq_f32(v, w);
-    else  if constexpr( cat == category::int32x2  ) return vcgt_s32(v, w);
-    else  if constexpr( cat == category::int16x4  ) return vcgt_s16(v, w);
-    else  if constexpr( cat == category::int8x8   ) return vcgt_s8(v, w);
-    else  if constexpr( cat == category::uint32x2 ) return vcgt_u32(v, w);
-    else  if constexpr( cat == category::uint16x4 ) return vcgt_u16(v, w);
-    else  if constexpr( cat == category::uint8x8  ) return vcgt_u8(v, w);
-    else  if constexpr( cat == category::float32x2) return vcgt_f32(v, w);
-    else if constexpr( current_api >= asimd)
-    {
-      if constexpr( cat == category::float64x1) return vcgt_f64(v, w);
-      else  if constexpr( cat == category::int64x1)   return vcgt_s64(v, w);
-      else  if constexpr( cat == category::uint64x1)  return vcgt_u64(v, w);
-      else  if constexpr( cat == category::float64x2) return vcgtq_f64(v, w);
-      else  if constexpr( cat == category::int64x2)   return vcgtq_s64(v, w);
-      else  if constexpr( cat == category::uint64x2)  return vcgtq_u64(v, w);
-    }
-    else  if constexpr( sizeof(T) == 8 )
-      return map([]<typename E>(E const& e, E const& f){ return as_logical_t<E>(e > f); }, v, w);
-  }
-
   template<typename T, typename N>
   EVE_FORCEINLINE logical<wide<T, N>> self_geq(wide<T, N> v, wide<T, N> w) noexcept
       requires arm_abi<abi_t<T, N>>

diff --git a/include/eve/detail/function/simd/arm/sve/friends.hpp b/include/eve/detail/function/simd/arm/sve/friends.hpp
@@ -22,11 +22,6 @@ EVE_FORCEINLINE auto
 self_neq(wide<T, N> v, wide<T, N> w) noexcept -> as_logical_t<wide<T, N>>
 requires sve_abi<abi_t<T, N>> { return svcmpne(sve_true<T>(), v, w); }
 
-template<arithmetic_scalar_value T, typename N>
-EVE_FORCEINLINE auto
-self_greater(wide<T, N> v, wide<T, N> w) noexcept -> as_logical_t<wide<T, N>>
-requires sve_abi<abi_t<T, N>> { return svcmpgt(sve_true<T>(), v, w); }
-
 template<arithmetic_scalar_value T, typename N>
 EVE_FORCEINLINE auto
 self_leq(wide<T, N> v, wide<T, N> w) noexcept -> as_logical_t<wide<T, N>>

diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp
@@ -113,20 +113,6 @@ namespace eve::detail
     }
   }
 
-  template<simd_value Wide>
-  EVE_FORCEINLINE auto self_greater(Wide const& v,Wide const& w) noexcept
-  {
-    if constexpr( product_type<Wide> )
-    {
-      return convert(kumi::to_tuple(v) > kumi::to_tuple(w), as_element<as_logical_t<Wide>>());
-    }
-    else
-    {
-      constexpr auto gt = []<typename E>(E const& e, E const& f) { return as_logical_t<E>(e > f); };
-      return apply_over(gt, v, w);
-    }
-  }
-
   template<simd_value Wide>
   EVE_FORCEINLINE auto self_geq(Wide const& v,Wide const& w) noexcept
   {

diff --git a/include/eve/detail/function/simd/ppc/friends.hpp b/include/eve/detail/function/simd/ppc/friends.hpp
@@ -28,13 +28,6 @@ namespace eve::detail
     return logical<wide<T,N>>(vec_cmpne(v.storage(), w.storage()));
   }
 
-  template<arithmetic_scalar_value T, typename N>
-  EVE_FORCEINLINE auto self_greater(wide<T, N> const &v, wide<T, N> const &w) noexcept
-    requires ppc_abi<abi_t<T, N>>
-  {
-    return logical<wide<T,N>>(vec_cmpgt(v.storage(), w.storage()));
-  }
-
   template<arithmetic_scalar_value T, typename N>
   EVE_FORCEINLINE auto self_geq(wide<T, N> const &v, wide<T, N> const &w) noexcept
     requires ppc_abi<abi_t<T, N>>

diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp
@@ -12,37 +12,6 @@ namespace eve::detail
 
 // *_impl in separate functions, as otherwise compiler can not
 // choose overload between riscv-specific and common one
-template<plain_scalar_value T, typename N, value U>
-EVE_FORCEINLINE auto
-self_greater_impl(wide<T, N> lhs, U rhs) noexcept -> logical<wide<T, N>>
-requires rvv_abi<abi_t<T, N>> && (std::same_as<wide<T, N>, U> || scalar_value<U>)
-{
-  if constexpr( scalar_value<U> && !std::same_as<T, U> )
-    return self_greater(lhs, static_cast<T>(rhs));
-  else
-  {
-    constexpr auto c = categorize<wide<T, N>>();
-    if constexpr( match(c, category::int_) ) return __riscv_vmsgt(lhs, rhs, N::value);
-    else if constexpr( match(c, category::uint_) ) return __riscv_vmsgtu(lhs, rhs, N::value);
-    else if constexpr( match(c, category::float_) ) return __riscv_vmfgt(lhs, rhs, N::value);
-  }
-}
-
-template<plain_scalar_value T, typename N>
-EVE_FORCEINLINE auto
-self_greater(wide<T, N> lhs, wide<T, N> rhs) noexcept -> logical<wide<T, N>>
-requires rvv_abi<abi_t<T, N>>
-{
-  return self_greater_impl(lhs, rhs);
-}
-
-template<plain_scalar_value T, typename N>
-EVE_FORCEINLINE auto
-self_greater(wide<T, N> lhs, std::convertible_to<T> auto rhs) noexcept -> logical<wide<T, N>>
-requires rvv_abi<abi_t<T, N>>
-{
-  return self_greater_impl(lhs, rhs);
-}
 
 template<plain_scalar_value T, typename N, value U>
 EVE_FORCEINLINE auto

diff --git a/include/eve/detail/function/simd/x86/friends.hpp b/include/eve/detail/function/simd/x86/friends.hpp
@@ -179,89 +179,6 @@ self_neq(logical<wide<T, N>> v, logical<wide<T, N>> w) noexcept requires x86_abi
   else { return bit_cast(v.bits() ^ w.bits(), as(v)); }
 }
 
-//================================================================================================
-template<arithmetic_scalar_value T, typename N>
-EVE_FORCEINLINE as_logical_t<wide<T, N>>
-                self_greater(wide<T, N> v, wide<T, N> w) noexcept requires x86_abi<abi_t<T, N>>
-{
-  constexpr auto c = categorize<wide<T, N>>();
-  constexpr auto f = to_integer(cmp_flt::gt_oq);
-
-  if constexpr( current_api >= avx512 )
-  {
-    if constexpr( c == category::float32x16 ) return mask16 {_mm512_cmp_ps_mask(v, w, f)};
-    else if constexpr( c == category::float32x8 ) return mask8 {_mm256_cmp_ps_mask(v, w, f)};
-    else if constexpr( c == category::float32x4 ) return mask8 {_mm_cmp_ps_mask(v, w, f)};
-    else if constexpr( c == category::float64x8 ) return mask8 {_mm512_cmp_pd_mask(v, w, f)};
-    else if constexpr( c == category::float64x4 ) return mask8 {_mm256_cmp_pd_mask(v, w, f)};
-    else if constexpr( c == category::float64x2 ) return mask8 {_mm_cmp_pd_mask(v, w, f)};
-    else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_cmpgt_epu64_mask(v, w)};
-    else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_cmpgt_epu64_mask(v, w)};
-    else if constexpr( c == category::uint64x2 ) return mask8 {_mm_cmpgt_epu64_mask(v, w)};
-    else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_cmpgt_epu32_mask(v, w)};
-    else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_cmpgt_epu32_mask(v, w)};
-    else if constexpr( c == category::uint32x4 ) return mask8 {_mm_cmpgt_epu32_mask(v, w)};
-    else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_cmpgt_epu16_mask(v, w)};
-    else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_cmpgt_epu16_mask(v, w)};
-    else if constexpr( c == category::uint16x8 ) return mask8 {_mm_cmpgt_epu16_mask(v, w)};
-    else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_cmpgt_epu8_mask(v, w)};
-    else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_cmpgt_epu8_mask(v, w)};
-    else if constexpr( c == category::uint8x16 ) return mask16 {_mm_cmpgt_epu8_mask(v, w)};
-    else if constexpr( c == category::int64x8 ) return mask8 {_mm512_cmpgt_epi64_mask(v, w)};
-    else if constexpr( c == category::int64x4 ) return mask8 {_mm256_cmpgt_epi64_mask(v, w)};
-    else if constexpr( c == category::int64x2 ) return mask8 {_mm_cmpgt_epi64_mask(v, w)};
-    else if constexpr( c == category::int32x16 ) return mask16 {_mm512_cmpgt_epi32_mask(v, w)};
-    else if constexpr( c == category::int32x8 ) return mask8 {_mm256_cmpgt_epi32_mask(v, w)};
-    else if constexpr( c == category::int32x4 ) return mask8 {_mm_cmpgt_epi32_mask(v, w)};
-    else if constexpr( c == category::int16x32 ) return mask32 {_mm512_cmpgt_epi16_mask(v, w)};
-    else if constexpr( c == category::int16x16 ) return mask16 {_mm256_cmpgt_epi16_mask(v, w)};
-    else if constexpr( c == category::int16x8 ) return mask8 {_mm_cmpgt_epi16_mask(v, w)};
-    else if constexpr( c == category::int8x64 ) return mask64 {_mm512_cmpgt_epi8_mask(v, w)};
-    else if constexpr( c == category::int8x32 ) return mask32 {_mm256_cmpgt_epi8_mask(v, w)};
-    else if constexpr( c == category::int8x16 ) return mask16 {_mm_cmpgt_epi8_mask(v, w)};
-  }
-  else
-  {
-    if constexpr( c == category::float32x8 ) return _mm256_cmp_ps(v, w, f);
-    else if constexpr( c == category::float64x4 ) return _mm256_cmp_pd(v, w, f);
-    else if constexpr( c == category::float32x4 ) return _mm_cmpgt_ps(v, w);
-    else if constexpr( c == category::float64x2 ) return _mm_cmpgt_pd(v, w);
-    else
-    {
-      constexpr auto use_avx2 = current_api >= avx2;
-      constexpr auto use_sse4 = current_api >= sse4_2;
-
-      constexpr auto gt = []<typename E>(E ev, E fv) { return as_logical_t<E>(ev > fv); };
-
-      [[maybe_unused]] auto unsigned_cmp = [](auto vv, auto vw)
-      {
-        using l_t     = logical<wide<T, N>>;
-        auto const sm = signmask(as<as_integer_t<wide<T, N>, signed>>());
-        return bit_cast((bit_cast(vv, as(sm)) - sm) > (bit_cast(vw, as(sm)) - sm), as<l_t> {});
-      };
-
-      if constexpr( use_avx2 && c == category::int64x4 ) return _mm256_cmpgt_epi64(v, w);
-      else if constexpr( use_avx2 && c == category::uint64x4 ) return unsigned_cmp(v, w);
-      else if constexpr( use_avx2 && c == category::int32x8 ) return _mm256_cmpgt_epi32(v, w);
-      else if constexpr( use_avx2 && c == category::uint32x8 ) return unsigned_cmp(v, w);
-      else if constexpr( use_avx2 && c == category::int16x16 ) return _mm256_cmpgt_epi16(v, w);
-      else if constexpr( use_avx2 && c == category::uint16x16 ) return unsigned_cmp(v, w);
-      else if constexpr( use_avx2 && c == category::int8x32 ) return _mm256_cmpgt_epi8(v, w);
-      else if constexpr( use_avx2 && c == category::uint8x32 ) return unsigned_cmp(v, w);
-      else if constexpr( use_sse4 && c == category::int64x2 ) return _mm_cmpgt_epi64(v, w);
-      else if constexpr( c == category::int64x2 ) return map(gt, v, w);
-      else if constexpr( c == category::int32x4 ) return _mm_cmpgt_epi32(v, w);
-      else if constexpr( c == category::int16x8 ) return _mm_cmpgt_epi16(v, w);
-      else if constexpr( c == category::int8x16 ) return _mm_cmpgt_epi8(v, w);
-      else if constexpr( c == category::uint64x2 ) return unsigned_cmp(v, w);
-      else if constexpr( c == category::uint32x4 ) return unsigned_cmp(v, w);
-      else if constexpr( c == category::uint16x8 ) return unsigned_cmp(v, w);
-      else if constexpr( c == category::uint8x16 ) return unsigned_cmp(v, w);
-      else return aggregate(gt, v, w);
-    }
-  }
-}
-
 //================================================================================================
 template<arithmetic_scalar_value T, typename N>
 EVE_FORCEINLINE as_logical_t<wide<T, N>>

diff --git a/include/eve/module/core/regular/impl/is_greater.hpp b/include/eve/module/core/regular/impl/is_greater.hpp
@@ -0,0 +1,39 @@
+//==================================================================================================
+/*
+  EVE - Expressive Vector Engine
+  Copyright : EVE Project Contributors
+  SPDX-License-Identifier: BSL-1.0
+*/
+//==================================================================================================
+#pragma once
+
+#include <eve/module/core/regular/abs.hpp>
+#include <eve/module/core/regular/if_else.hpp>
+#include <eve/module/core/regular/fam.hpp>
+#include <eve/module/core/regular/next.hpp>
+#include <eve/module/core/regular/max.hpp>
+#include <eve/traits/as_logical.hpp>
+#include <eve/module/core/detail/tolerance.hpp>
+
+namespace eve::detail
+{
+  template<callable_options O, value T>
+  EVE_FORCEINLINE constexpr as_logical_t<T> is_greater_(EVE_REQUIRES(cpu_), O const & o, T a, T b) noexcept
+  {
+    if constexpr(O::contains(definitely))
+    {
+      auto tol = o[definitely].value(T{});
+      if constexpr(integral_value<decltype(tol)>) return a > eve::next(b, tol);
+      else                                        return a > fam(b, tol, eve::max(eve::abs(a), eve::abs(b)));
+    }
+    else if constexpr (product_type<T>)
+    {
+      return kumi::to_tuple(a) > kumi::to_tuple(b);
+    }
+    else
+    {
+      if constexpr (scalar_value<T>) return as_logical_t<T>(a > b);
+      else                           return map([]<typename E>(E e, E f){ return as_logical_t<E>(e > f); }, a, b);
+    }
+  }
+}
diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_greater.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_greater.hpp
@@ -0,0 +1,55 @@
+//==================================================================================================
+/*
+  EVE - Expressive Vector Engine
+  Copyright : EVE Project Contributors
+  SPDX-License-Identifier: BSL-1.0
+*/
+//==================================================================================================
+#pragma once
+
+#include <eve/concept/value.hpp>
+#include <eve/detail/abi.hpp>
+#include <eve/detail/category.hpp>
+#include <eve/forward.hpp>
+
+namespace eve::detail
+{
+  template<callable_options O, arithmetic_scalar_value T, typename N>
+  EVE_FORCEINLINE logical<wide<T, N>> is_greater_(EVE_REQUIRES(neon128_), O const& opts, wide<T, N> a, wide<T, N> b) noexcept
+    requires arm_abi<abi_t<T, N>>
+  {
+    if constexpr (O::contains(definitely))
+    {
+      return is_greater.behavior(cpu_{}, opts, a, b);
+    }
+    else
+    {
+      constexpr auto cat = categorize<wide<T, N>>();
+
+      if      constexpr (cat == category::int32x4)   return vcgtq_s32(a, b);
+      else if constexpr (cat == category::int16x8)   return vcgtq_s16(a, b);
+      else if constexpr (cat == category::int8x16)   return vcgtq_s8(a, b);
+      else if constexpr (cat == category::uint32x4)  return vcgtq_u32(a, b);
+      else if constexpr (cat == category::uint16x8)  return vcgtq_u16(a, b);
+      else if constexpr (cat == category::uint8x16)  return vcgtq_u8(a, b);
+      else if constexpr (cat == category::float32x4) return vcgtq_f32(a, b);
+      else if constexpr (cat == category::int32x2)   return vcgt_s32(a, b);
+      else if constexpr (cat == category::int16x4)   return vcgt_s16(a, b);
+      else if constexpr (cat == category::int8x8)    return vcgt_s8(a, b);
+      else if constexpr (cat == category::uint32x2)  return vcgt_u32(a, b);
+      else if constexpr (cat == category::uint16x4)  return vcgt_u16(a, b);
+      else if constexpr (cat == category::uint8x8)   return vcgt_u8(a, b);
+      else if constexpr (cat == category::float32x2) return vcgt_f32(a, b);
+      else if constexpr (current_api >= asimd)
+      {
+        if      constexpr (cat == category::float64x1) return vcgt_f64(a, b);
+        else if constexpr (cat == category::int64x1)   return vcgt_s64(a, b);
+        else if constexpr (cat == category::uint64x1)  return vcgt_u64(a, b);
+        else if constexpr (cat == category::float64x2) return vcgtq_f64(a, b);
+        else if constexpr (cat == category::int64x2)   return vcgtq_s64(a, b);
+        else if constexpr (cat == category::uint64x2)  return vcgtq_u64(a, b);
+      }
+      else return map([]<typename E>(E e, E f){ return as_logical_t<E>(e > f); }, a, b);
+    }
+  }
+}
diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_greater.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_greater.hpp
@@ -0,0 +1,22 @@
+//==================================================================================================
+/*
+  EVE - Expressive Vector Engine
+  Copyright : EVE Project Contributors
+  SPDX-License-Identifier: BSL-1.0
+*/
+//==================================================================================================
+#pragma once
+
+#include <eve/concept/value.hpp>
+#include <eve/detail/implementation.hpp>
+
+namespace eve::detail
+{
+  template<callable_options O, typename T, typename N>
+  EVE_FORCEINLINE logical<wide<T, N>> is_greater_(EVE_REQUIRES(sve_), O const& opts, wide<T, N> a, wide<T, N> b) noexcept
+    requires sve_abi<abi_t<T, N>>
+  {
+    if constexpr (O::contains(definitely)) return is_greater.behavior(cpu_{}, opts, a, b);
+    else                                   return svcmpgt(sve_true<T>(), a, b);
+  }
+}