From 093b0e58830cb1300eec43a2a3e8e78c12e4b0ef Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 20 May 2025 16:25:04 +0200 Subject: [PATCH] Fix ml_dtypes on AVX512 CPUs with FP16 support --- .../m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb | 8 ++- ...ypes-0.5.0_fix-Eigen-Float16-support.patch | 53 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch diff --git a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb index fa34f120723f..9a0b12e3c7c0 100644 --- a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb +++ b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb @@ -39,10 +39,16 @@ exts_list = [ 'checksums': ['4eaa9d7248fd4eeb75e44d47ca29875a5ccea044cc14a17435794bf8ac116a05'], }), (name, version, { - 'patches': [('ml_dtypes-0.3.2_EigenAvx512.patch', 1)], + 'patches': [ + ('ml_dtypes-0.3.2_EigenAvx512.patch', 1), + # The source as CRLF line endings but `patch` strips any CRs so disable with '--binary' + {'name': 'ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch', 'opts': '--binary'}, + ], 'checksums': [ {'ml_dtypes-0.5.0.tar.gz': '3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128'}, {'ml_dtypes-0.3.2_EigenAvx512.patch': '197b05b0b7f611749824369f026099f6a172f9e8eab6ebb6504a16573746c892'}, + {'ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch': + '36261acfe8241481edfcca76a8452eb92da5f48e5ca0ea26d746f84654f2a4de'}, ], }), ] diff --git a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch new file mode 100644 index 000000000000..1f4673263864 --- /dev/null +++ b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0_fix-Eigen-Float16-support.patch @@ -0,0 +1,53 @@ +Fix a compilation error in Eigen when compiled for hardware supporting Float16. +That causes: +> third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h:116:25: error: invalid 'static_cast' from type 'const Eigen::half' to type '_Float16' +> 123 | return _mm512_set1_ph(static_cast<_Float16>(from)); +> | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ +> third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h:216:39: error: invalid user-defined conversion from 'const Eigen::half' to '_Float16' [-fpermissive] +> 216 | return _mm512_add_ph(_mm512_set1_ph(a), +> | ^ + +See https://gitlab.com/libeigen/eigen/-/issues/2835 + +Author: Alexander Grund (TU Dresden) + +diff -ur a/third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +--- a/third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h 2025-05-20 14:25:44.582173849 +0200 ++++ b/third_party/eigen/Eigen/src/Core/arch/AVX512/PacketMathFP16.h 2025-05-20 14:26:29.139613400 +0200 +@@ -113,7 +113,13 @@ + + template <> + EIGEN_STRONG_INLINE Packet32h pset1(const Eigen::half& from) { +- return _mm512_set1_ph(static_cast<_Float16>(from)); ++ // half/half_raw is bit compatible ++ return _mm512_set1_ph(numext::bit_cast<_Float16>(from)); ++} ++ ++template <> ++EIGEN_STRONG_INLINE Packet32h pzero(const Packet32h& /*a*/) { ++ return _mm512_setzero_ph(); + } + + // pset1frombits +@@ -213,10 +219,8 @@ + // plset + template <> + EIGEN_STRONG_INLINE Packet32h plset(const half& a) { +- return _mm512_add_ph(_mm512_set1_ph(a), +- _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f, +- 19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, +- 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f)); ++ return _mm512_add_ph(pset1(a), _mm512_set_ph(31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, ++ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + } + + // por +@@ -507,7 +511,7 @@ + + template <> + EIGEN_STRONG_INLINE Packet32h pnegate(const Packet32h& a) { +- return _mm512_sub_ph(_mm512_set1_ph(0.0), a); ++ return psub(pzero(a), a); + } + + // pconj