[X86][CIR]Implement handling for F16 halfs to float conversion builtins #173572
[X86][CIR]Implement handling for F16 halfs to float conversion builtins #173572andykaylor merged 22 commits intollvm:mainfrom
Conversation
|
@llvm/pr-subscribers-clangir @llvm/pr-subscribers-clang Author: Priyanshu Kumar (Priyanshu3820) ChangesRelated to: #167765 Full diff: https://github.com/llvm/llvm-project/pull/173572.diff 2 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..07f915b51ad6d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,20 @@
#include "CIRGenBuilder.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
#include "mlir/IR/ValueRange.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
using namespace clang;
using namespace clang::CIRGen;
@@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createMul(loc, lhs, rhs);
}
+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ const StringRef str,
+ llvm::ArrayRef<mlir::Value> ops,
+ mlir::Type dstTy) {
+ assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+ "Unknown cvtph2ps intrinsic");
+
+ // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+ if (ops.size() == 4 &&
+ ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+ 4) {
+ return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+ }
+
+ unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
+ mlir::Value src = ops[0];
+
+ // Extract the subvector
+ if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
+ assert(numElts == 4 && "Unexpected vector size");
+ src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
+ }
+
+ // Bitcast from vXi16 to vXf16.
+ cir::VectorType halfTy = cir::VectorType::get(
+ cir::FP16Type::get(builder.getContext()), numElts);
+
+ src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
+
+ // Perform the fp-extension
+ mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
+
+ if (ops.size() >= 3)
+ res = emitX86Select(builder, loc, ops[2], res, ops[1]);
+ return res;
+}
+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::SmallVector<mlir::Value> ops,
bool isSigned) {
@@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_cmpnltsd:
case X86::BI__builtin_ia32_cmpnlesd:
case X86::BI__builtin_ia32_cmpordsd:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_vcvtph2ps_mask:
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
+ convertType(expr->getType()));
+ }
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
new file mode 100644
index 0000000000000..ee42f5de48d98
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
+ // CIR-LABEL: test_vcvtph2ps_mask
+ // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps_mask
+ // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // LLVM: ret <4 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps_mask
+ // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+ // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+ // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+ // OGCG: ret <4 x float> {{.*}}
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
+}
+
+__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
+ // CIR-LABEL: test_vcvtph2ps256_mask
+ // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+ // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps256_mask
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+ // LLVM: ret <8 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps256_mask
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+ // OGCG: ret <8 x float> {{.*}}
+ typedef short __v8hi __attribute__((__vector_size__(16)));
+ return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
+}
+
+__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
+ // CIR-LABEL: test_vcvtph2ps512_mask
+ // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+ // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+ // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
+ // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+
+ // LLVM-LABEL: @test_vcvtph2ps512_mask
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+ // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+ // LLVM: ret <16 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps512_mask
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+ // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+ // OGCG: ret <16 x float> {{.*}}
+ typedef short __v16hi __attribute__((__vector_size__(32)));
+ return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
+}
+
+__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+ // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+ // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+ // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+ // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+ // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+
+ // LLVM-LABEL: @test_vcvtph2ps_maskz
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+ // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+ // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // LLVM: ret <4 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps_maskz
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+ // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+ // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+ // OGCG: ret <4 x float> {{.*}}
+
+ return _mm_maskz_cvtph_ps(k, a);
+}
+
+__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+ // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+ // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}})
+
+
+ // LLVM-LABEL: @test_vcvtph2ps256_maskz
+ // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+ // LLVM: ret <8 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps256_maskz
+ // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+ // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+ // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+ // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+ // OGCG: ret <8 x float> {{.*}}
+ return _mm256_maskz_cvtph_ps(k, a);
+}
+
+__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
+ // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+ // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+ // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
+ // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+ // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+
+ // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+ // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+
+ // LLVM-LABEL: @test_vcvtph2ps512_maskz
+ // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+ // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+ // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // LLVM: ret <16 x float> {{.*}}
+
+ // OGCG-LABEL: @test_vcvtph2ps512_maskz
+ // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+ // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+ // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+ // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+ // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+ // OGCG: ret <16 x float> {{.*}}
+ return _mm512_maskz_cvtph_ps(k, a);
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
|
|
||
| // If the SAE intrinsic doesn't use default rounding then we can't upgrade. | ||
| if (ops.size() == 4 && | ||
| ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() != |
There was a problem hiding this comment.
I believe getDefiningOp<cir::ConstantOp>() will return a null value without asserting if the defining op is not a cir::ConstantOp. That shouldn't happen, but it might make this tricky to debug if it does happen. I'd suggest rewriting this so that you can add an assert before calling getIntValue.
| // Convert F16 halfs to floats. | ||
| static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, | ||
| mlir::Location loc, | ||
| const StringRef str, |
There was a problem hiding this comment.
I don't think you need this parameter, and the way you're using it seems to be incorrect.
There was a problem hiding this comment.
Replaced with the respective intrinsic names
| if (ops.size() == 4 && | ||
| ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() != | ||
| 4) { | ||
| return emitIntrinsicCallOp(builder, loc, str, dstTy, ops); |
There was a problem hiding this comment.
I don't believe any of your test cases hit this case. If you get here, you need to emit a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 and the value you're passing in as str won't do that.
These test cases should get here (copied from clang/test/CodeGen/X86/avx512f-builtins.c)
__m512 test_mm512_cvt_roundph_ps(__m256i __A)
{
// CHECK-LABEL: test_mm512_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
return _mm512_cvt_roundph_ps(__A, _MM_FROUND_NO_EXC);
}
__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A)
{
// CHECK-LABEL: test_mm512_mask_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_NO_EXC);
}
__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A)
{
// CHECK-LABEL: test_mm512_maskz_cvt_roundph_ps
// CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_NO_EXC);
}
There was a problem hiding this comment.
Pull request overview
This PR implements CIR (Clang Intermediate Representation) support for three X86 F16 (half-precision float) to float conversion builtins, contributing to the broader effort tracked in issue #167765 to implement missing X86 builtins in the CIR incubator.
- Adds the
emitX86CvtF16ToFloatExprhelper function to handle F16-to-float conversions with proper rounding mode support - Implements the three masked conversion builtins with support for both default rounding and explicit rounding modes via LLVM intrinsics
- Provides comprehensive test coverage including mask, maskz, and rounding variants
Reviewed changes
Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.
| File | Description |
|---|---|
| clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | New comprehensive test file with 9 test functions covering all three builtins with mask, maskz, and rounding variants, comparing CIR, LLVM, and original CodeGen outputs |
| clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | Implements emitX86CvtF16ToFloatExpr function (lines 449-502) to handle F16-to-float conversion logic including subvector extraction, bitcasting, fp-extension, and masking; adds switch case entries (lines 1890-1896) to route the three builtins to this implementation |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| return builder.createMul(loc, lhs, rhs); | ||
| } | ||
|
|
||
| // Convert F16 halfs to floats. |
There was a problem hiding this comment.
The word "halfs" should be "halves" (the correct plural form of "half").
| // Convert F16 halfs to floats. | |
| // Convert F16 halves to floats. |
There was a problem hiding this comment.
Although Copilot's suggestion is correct in terms of grammar, I wouldn't make the suggested change. The use of halfs is awkard though. Perhaps Convert f16 half values to floats.
| assert(constOp && "Expected constant operand"); | ||
| if (constOp.getIntValue().getZExtValue() != 4) { | ||
| StringRef intrinsicName; | ||
| switch (builtinID) { |
There was a problem hiding this comment.
Only the x86.avx512.mask.vcvtph2ps.512 form of this intrinsic takes a rounding mode parameter, so you don't need to switch on the builtin ID here. Note that in classic codegen EmitX86CvtF16ToFloatExpr always generates a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 here. We should do the same.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/26994 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/23836 Here is the relevant piece of the build log for the reference |
Related to: #167765
This PR implements-
BI__builtin_ia32_vcvtph2ps_maskBI__builtin_ia32_vcvtph2ps256_maskBI__builtin_ia32_vcvtph2ps512_mask