From 21e58286204a47fb72470e7a1598e1d649ddfcba Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 14:21:53 +0000 Subject: [PATCH 01/20] Implement handling for F16 halfs to floats conversion builtins --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 56 +++++- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 185 ++++++++++++++++++ 2 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 75bf25b20f1af..07f915b51ad6d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -14,13 +14,20 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" #include "mlir/IR/ValueRange.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" +#include using namespace clang; using namespace clang::CIRGen; @@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createMul(loc, lhs, rhs); } +// Convert F16 halfs to floats. +static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, + mlir::Location loc, + const StringRef str, + llvm::ArrayRef ops, + mlir::Type dstTy) { + assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) && + "Unknown cvtph2ps intrinsic"); + + // If the SAE intrinsic doesn't use default rounding then we can't upgrade. + if (ops.size() == 4 && + ops[3].getDefiningOp().getIntValue().getZExtValue() != + 4) { + return emitIntrinsicCallOp(builder, loc, str, dstTy, ops); + } + + unsigned numElts = cast(dstTy).getSize(); + mlir::Value src = ops[0]; + + // Extract the subvector + if (numElts != cast(src.getType()).getSize()) { + assert(numElts == 4 && "Unexpected vector size"); + src = builder.createVecShuffle(loc, src, {0, 1, 2, 3}); + } + + // Bitcast from vXi16 to vXf16. + cir::VectorType halfTy = cir::VectorType::get( + cir::FP16Type::get(builder.getContext()), numElts); + + src = builder.createCast(cir::CastKind::bitcast, src, halfTy); + + // Perform the fp-extension + mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy); + + if (ops.size() >= 3) + res = emitX86Select(builder, loc, ops[2], res, ops[1]); + return res; +} + static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc, llvm::SmallVector ops, bool isSigned) { @@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cmpnltsd: case X86::BI__builtin_ia32_cmpnlesd: case X86::BI__builtin_ia32_cmpordsd: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return {}; case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: - case X86::BI__builtin_ia32_vcvtph2ps512_mask: + case X86::BI__builtin_ia32_vcvtph2ps512_mask: { + mlir::Location loc = getLoc(expr->getExprLoc()); + return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops, + convertType(expr->getType())); + } case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c new file mode 100644 index 0000000000000..ee42f5de48d98 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -0,0 +1,185 @@ +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s + +#include + +__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { + // CIR-LABEL: test_vcvtph2ps_mask + // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> + // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}} + + // LLVM-LABEL: @test_vcvtph2ps_mask + // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> + // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> + // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> + // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} + // LLVM: ret <4 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps_mask + // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> + // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> + // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> + // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} + // OGCG: ret <4 x float> {{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); + return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k); +} + +__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { + // CIR-LABEL: test_vcvtph2ps256_mask + // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}} + + // LLVM-LABEL: @test_vcvtph2ps256_mask + // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> + // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> + // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> + // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}} + // LLVM: ret <8 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps256_mask + // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> + // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> + // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> + // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}} + // OGCG: ret <8 x float> {{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); + return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k); +} + +__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { + // CIR-LABEL: test_vcvtph2ps512_mask + // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool> + // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}} + + // LLVM-LABEL: @test_vcvtph2ps512_mask + // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> + // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half> + // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float> + // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> + // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}} + // LLVM: ret <16 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps512_mask + // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> + // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half> + // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float> + // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> + // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}} + // OGCG: ret <16 x float> {{.*}} + typedef short __v16hi __attribute__((__vector_size__(32))); + return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4); +} + +__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { + // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps + // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps() + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool> + // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]] + + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz + // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}}) + + // LLVM-LABEL: @test_vcvtph2ps_maskz + // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> + // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half> + // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float> + // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}} + // LLVM: ret <4 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps_maskz + // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> + // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half> + // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float> + // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}} + // OGCG: ret <4 x float> {{.*}} + + return _mm_maskz_cvtph_ps(k, a); +} + +__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { + // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps + // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps() + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz + // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) + + + // LLVM-LABEL: @test_vcvtph2ps256_maskz + // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> + // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> + // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> + // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}} + // LLVM: ret <8 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps256_maskz + // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> + // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> + // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> + // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}} + // OGCG: ret <8 x float> {{.*}} + return _mm256_maskz_cvtph_ps(k, a); +} + +__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { + // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps + // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps() + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz + // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}}) + + // LLVM-LABEL: @test_vcvtph2ps512_maskz + // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> + // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half> + // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float> + // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> + // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}} + // LLVM: ret <16 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps512_maskz + // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> + // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half> + // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float> + // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> + // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}} + // OGCG: ret <16 x float> {{.*}} + return _mm512_maskz_cvtph_ps(k, a); +} From b73200cc338b40a38999ccbdeb174e45c9e9fff2 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 14:30:42 +0000 Subject: [PATCH 02/20] Remove unwanted headers included by clangd --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 07f915b51ad6d..9ecec9d615bc4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -14,20 +14,13 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" -#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" -#include "mlir/IR/Types.h" -#include "mlir/IR/Value.h" #include "mlir/IR/ValueRange.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/TargetBuiltins.h" -#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" -#include using namespace clang; using namespace clang::CIRGen; From a72461410892a3561c0cede6cd564a266f507eb4 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 14:33:48 +0000 Subject: [PATCH 03/20] Fix formatting --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 9ecec9d615bc4..b39a4e683385d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -388,8 +388,8 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, } // Bitcast from vXi16 to vXf16. - cir::VectorType halfTy = cir::VectorType::get( - cir::FP16Type::get(builder.getContext()), numElts); + cir::VectorType halfTy = + cir::VectorType::get(cir::FP16Type::get(builder.getContext()), numElts); src = builder.createCast(cir::CastKind::bitcast, src, halfTy); From b29a415f3dbfb072c3b29e0c219f9dd1f3e3dbec Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 14:34:03 +0000 Subject: [PATCH 04/20] Fix formatting --- .../X86/avx512f16c-builtins.cir | 393 ++++++++++++++++++ 1 file changed, 393 insertions(+) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir new file mode 100644 index 0000000000000..9364d531b3585 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir @@ -0,0 +1,393 @@ +!s16i = !cir.int +!s32i = !cir.int +!s64i = !cir.int +!u16i = !cir.int +!u8i = !cir.int +#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28) +#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36) +#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39) +#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46) +#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51) +#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60) +#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31) +#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39) +#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42) +#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49) +#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54) +#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63) +#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31) +#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39) +#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42) +#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49) +#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54) +#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64) +#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21) +#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30) +#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35) +#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43) +#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29) +#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37) +#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40) +#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49) +#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24) +#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33) +#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38) +#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46) +#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32) +#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40) +#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43) +#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52) +#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24) +#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34) +#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39) +#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47) +#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32) +#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40) +#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43) +#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53) +#loc145 = loc(fused[#loc3, #loc4]) +#loc146 = loc(fused[#loc5, #loc6]) +#loc147 = loc(fused[#loc7, #loc8]) +#loc150 = loc(fused[#loc18, #loc19]) +#loc151 = loc(fused[#loc20, #loc21]) +#loc152 = loc(fused[#loc22, #loc23]) +#loc155 = loc(fused[#loc33, #loc34]) +#loc156 = loc(fused[#loc35, #loc36]) +#loc157 = loc(fused[#loc37, #loc38]) +#loc164 = loc(fused[#loc56, #loc57]) +#loc165 = loc(fused[#loc58, #loc59]) +#loc168 = loc(fused[#loc69, #loc70]) +#loc169 = loc(fused[#loc71, #loc72]) +#loc176 = loc(fused[#loc88, #loc89]) +#loc177 = loc(fused[#loc90, #loc91]) +#loc180 = loc(fused[#loc101, #loc102]) +#loc181 = loc(fused[#loc103, #loc104]) +#loc188 = loc(fused[#loc120, #loc121]) +#loc189 = loc(fused[#loc122, #loc123]) +#loc192 = loc(fused[#loc134, #loc135]) +#loc193 = loc(fused[#loc136, #loc137]) +module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>} { + cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc145) + %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 16 : i64} loc(#loc146) + %2 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc147) + %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc2) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc9) + cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc9) + cir.store %arg2, %2 : !u8i, !cir.ptr loc(#loc9) + %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc10) + %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10) + %6 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc11) + %7 = cir.load align(1) %2 : !cir.ptr, !u8i loc(#loc12) + %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13) + %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13) + %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13) + %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13) + %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12) + %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13) + %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13) + cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc148) + %15 = cir.load %3 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc148) + cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148) + } loc(#loc144) + cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc150) + %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 32 : i64} loc(#loc151) + %2 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc152) + %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc17) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc24) + cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc24) + cir.store %arg2, %2 : !u8i, !cir.ptr loc(#loc24) + %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc25) + %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25) + %6 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc26) + %7 = cir.load align(1) %2 : !cir.ptr, !u8i loc(#loc27) + %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25) + %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25) + %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27) + %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28) + cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc153) + %12 = cir.load %3 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc153) + cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153) + } loc(#loc149) + cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> { + %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["a", init] {alignment = 32 : i64} loc(#loc155) + %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 64 : i64} loc(#loc156) + %2 = cir.alloca !u16i, !cir.ptr, ["k", init] {alignment = 2 : i64} loc(#loc157) + %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc32) + cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc39) + cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc39) + cir.store %arg2, %2 : !u16i, !cir.ptr loc(#loc39) + %4 = cir.load align(32) %0 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc40) + %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40) + %6 = cir.load align(64) %1 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc41) + %7 = cir.load align(2) %2 : !cir.ptr, !u16i loc(#loc42) + %8 = cir.const #cir.int<4> : !s32i loc(#loc43) + %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40) + %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40) + %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42) + %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44) + cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc158) + %13 = cir.load %3 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc158) + cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158) + } loc(#loc154) + cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> { + %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc48) + %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160) + %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161) + cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc52) + %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc49) + cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc162) + %4 = cir.load %0 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc162) + cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162) + } loc(#loc159) + cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> { + %0 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc164) + %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 16 : i64} loc(#loc165) + %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc55) + cir.store %arg0, %0 : !u8i, !cir.ptr loc(#loc60) + cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc60) + %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc61) + %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61) + %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62) + %6 = cir.load align(1) %0 : !cir.ptr, !u8i loc(#loc63) + %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64) + %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64) + %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64) + %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64) + %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63) + %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64) + %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64) + cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc166) + %14 = cir.load %2 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc166) + cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166) + } loc(#loc163) + cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc168) + %1 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc169) + %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc68) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc73) + cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc73) + %3 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc74) + %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc75) + %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76) + cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc170) + %6 = cir.load %2 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc170) + cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170) + } loc(#loc167) + cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> { + %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc80) + %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172) + %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173) + cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc84) + %3 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc81) + cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc174) + %4 = cir.load %0 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc174) + cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174) + } loc(#loc171) + cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> { + %0 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc176) + %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 16 : i64} loc(#loc177) + %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc87) + cir.store %arg0, %0 : !u8i, !cir.ptr loc(#loc92) + cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc92) + %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc93) + %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93) + %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94) + %6 = cir.load align(1) %0 : !cir.ptr, !u8i loc(#loc95) + %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93) + %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93) + %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95) + %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96) + cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc178) + %11 = cir.load %2 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc178) + cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178) + } loc(#loc175) + cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc180) + %1 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc181) + %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc100) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc105) + cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc105) + %3 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc106) + %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc107) + %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108) + cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc182) + %6 = cir.load %2 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc182) + cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182) + } loc(#loc179) + cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> { + %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc112) + %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184) + %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185) + cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc116) + %3 = cir.load align(64) %1 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc113) + cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc186) + %4 = cir.load %0 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc186) + cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186) + } loc(#loc183) + cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> { + %0 = cir.alloca !u16i, !cir.ptr, ["__U", init] {alignment = 2 : i64} loc(#loc188) + %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 32 : i64} loc(#loc189) + %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc119) + cir.store %arg0, %0 : !u16i, !cir.ptr loc(#loc124) + cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc124) + %3 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc125) + %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125) + %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126) + %6 = cir.load align(2) %0 : !cir.ptr, !u16i loc(#loc127) + %7 = cir.const #cir.int<4> : !s32i loc(#loc128) + %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125) + %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125) + %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127) + %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129) + cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc190) + %12 = cir.load %2 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc190) + cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190) + } loc(#loc187) + cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> { + %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["a", init] {alignment = 32 : i64} loc(#loc192) + %1 = cir.alloca !u16i, !cir.ptr, ["k", init] {alignment = 2 : i64} loc(#loc193) + %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc133) + cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc138) + cir.store %arg1, %1 : !u16i, !cir.ptr loc(#loc138) + %3 = cir.load align(2) %1 : !cir.ptr, !u16i loc(#loc139) + %4 = cir.load align(32) %0 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc140) + %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141) + cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc194) + %6 = cir.load %2 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc194) + cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194) + } loc(#loc191) +} loc(#loc) +#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0) +#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1) +#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1) +#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63) +#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48) +#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51) +#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56) +#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10) +#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3) +#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57) +#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1) +#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1) +#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66) +#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51) +#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54) +#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59) +#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10) +#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3) +#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60) +#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1) +#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1) +#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67) +#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52) +#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55) +#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60) +#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63) +#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10) +#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3) +#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64) +#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1) +#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1) +#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24) +#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57) +#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32) +#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1) +#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3) +#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1) +#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1) +#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1) +#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59) +#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14) +#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25) +#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19) +#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3) +#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28) +#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1) +#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1) +#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52) +#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29) +#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32) +#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10) +#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3) +#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33) +#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1) +#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1) +#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24) +#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81) +#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32) +#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53) +#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3) +#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1) +#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1) +#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1) +#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62) +#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17) +#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28) +#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19) +#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3) +#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31) +#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1) +#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1) +#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55) +#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33) +#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36) +#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11) +#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4) +#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37) +#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1) +#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1) +#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23) +#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78) +#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31) +#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56) +#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3) +#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1) +#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1) +#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1) +#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63) +#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28) +#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30) +#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18) +#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19) +#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3) +#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42) +#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1) +#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1) +#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56) +#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32) +#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35) +#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10) +#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3) +#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36) +#loc144 = loc(fused[#loc1, #loc2]) +#loc148 = loc(fused[#loc14, #loc15]) +#loc149 = loc(fused[#loc16, #loc17]) +#loc153 = loc(fused[#loc29, #loc30]) +#loc154 = loc(fused[#loc31, #loc32]) +#loc158 = loc(fused[#loc45, #loc46]) +#loc159 = loc(fused[#loc47, #loc48]) +#loc160 = loc(fused[#loc49, #loc50]) +#loc161 = loc(fused[#loc51, #loc50]) +#loc162 = loc(fused[#loc53, #loc50]) +#loc163 = loc(fused[#loc54, #loc55]) +#loc166 = loc(fused[#loc65, #loc66]) +#loc167 = loc(fused[#loc67, #loc68]) +#loc170 = loc(fused[#loc77, #loc78]) +#loc171 = loc(fused[#loc79, #loc80]) +#loc172 = loc(fused[#loc81, #loc82]) +#loc173 = loc(fused[#loc83, #loc82]) +#loc174 = loc(fused[#loc85, #loc82]) +#loc175 = loc(fused[#loc86, #loc87]) +#loc178 = loc(fused[#loc97, #loc98]) +#loc179 = loc(fused[#loc99, #loc100]) +#loc182 = loc(fused[#loc109, #loc110]) +#loc183 = loc(fused[#loc111, #loc112]) +#loc184 = loc(fused[#loc113, #loc114]) +#loc185 = loc(fused[#loc115, #loc114]) +#loc186 = loc(fused[#loc117, #loc114]) +#loc187 = loc(fused[#loc118, #loc119]) +#loc190 = loc(fused[#loc130, #loc131]) +#loc191 = loc(fused[#loc132, #loc133]) +#loc194 = loc(fused[#loc142, #loc143]) From 284c8750554ef09d2f919e30ef2c82d8289ef493 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 20:29:57 +0530 Subject: [PATCH 05/20] Delete clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir --- .../X86/avx512f16c-builtins.cir | 393 ------------------ 1 file changed, 393 deletions(-) delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir deleted file mode 100644 index 9364d531b3585..0000000000000 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir +++ /dev/null @@ -1,393 +0,0 @@ -!s16i = !cir.int -!s32i = !cir.int -!s64i = !cir.int -!u16i = !cir.int -!u8i = !cir.int -#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28) -#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36) -#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39) -#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46) -#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51) -#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60) -#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31) -#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39) -#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42) -#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49) -#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54) -#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63) -#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31) -#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39) -#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42) -#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49) -#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54) -#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64) -#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21) -#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30) -#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35) -#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43) -#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29) -#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37) -#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40) -#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49) -#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24) -#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33) -#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38) -#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46) -#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32) -#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40) -#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43) -#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52) -#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24) -#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34) -#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39) -#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47) -#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32) -#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40) -#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43) -#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53) -#loc145 = loc(fused[#loc3, #loc4]) -#loc146 = loc(fused[#loc5, #loc6]) -#loc147 = loc(fused[#loc7, #loc8]) -#loc150 = loc(fused[#loc18, #loc19]) -#loc151 = loc(fused[#loc20, #loc21]) -#loc152 = loc(fused[#loc22, #loc23]) -#loc155 = loc(fused[#loc33, #loc34]) -#loc156 = loc(fused[#loc35, #loc36]) -#loc157 = loc(fused[#loc37, #loc38]) -#loc164 = loc(fused[#loc56, #loc57]) -#loc165 = loc(fused[#loc58, #loc59]) -#loc168 = loc(fused[#loc69, #loc70]) -#loc169 = loc(fused[#loc71, #loc72]) -#loc176 = loc(fused[#loc88, #loc89]) -#loc177 = loc(fused[#loc90, #loc91]) -#loc180 = loc(fused[#loc101, #loc102]) -#loc181 = loc(fused[#loc103, #loc104]) -#loc188 = loc(fused[#loc120, #loc121]) -#loc189 = loc(fused[#loc122, #loc123]) -#loc192 = loc(fused[#loc134, #loc135]) -#loc193 = loc(fused[#loc136, #loc137]) -module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>} { - cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> { - %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc145) - %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 16 : i64} loc(#loc146) - %2 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc147) - %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc2) - cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc9) - cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc9) - cir.store %arg2, %2 : !u8i, !cir.ptr loc(#loc9) - %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc10) - %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10) - %6 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc11) - %7 = cir.load align(1) %2 : !cir.ptr, !u8i loc(#loc12) - %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13) - %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13) - %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13) - %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13) - %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12) - %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13) - %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13) - cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc148) - %15 = cir.load %3 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc148) - cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148) - } loc(#loc144) - cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> { - %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc150) - %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 32 : i64} loc(#loc151) - %2 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc152) - %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc17) - cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc24) - cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc24) - cir.store %arg2, %2 : !u8i, !cir.ptr loc(#loc24) - %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc25) - %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25) - %6 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc26) - %7 = cir.load align(1) %2 : !cir.ptr, !u8i loc(#loc27) - %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25) - %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25) - %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27) - %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28) - cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc153) - %12 = cir.load %3 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc153) - cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153) - } loc(#loc149) - cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> { - %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["a", init] {alignment = 32 : i64} loc(#loc155) - %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["src", init] {alignment = 64 : i64} loc(#loc156) - %2 = cir.alloca !u16i, !cir.ptr, ["k", init] {alignment = 2 : i64} loc(#loc157) - %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc32) - cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc39) - cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc39) - cir.store %arg2, %2 : !u16i, !cir.ptr loc(#loc39) - %4 = cir.load align(32) %0 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc40) - %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40) - %6 = cir.load align(64) %1 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc41) - %7 = cir.load align(2) %2 : !cir.ptr, !u16i loc(#loc42) - %8 = cir.const #cir.int<4> : !s32i loc(#loc43) - %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40) - %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40) - %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42) - %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44) - cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc158) - %13 = cir.load %3 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc158) - cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158) - } loc(#loc154) - cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> { - %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc48) - %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160) - %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161) - cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc52) - %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc49) - cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc162) - %4 = cir.load %0 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc162) - cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162) - } loc(#loc159) - cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> { - %0 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc164) - %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 16 : i64} loc(#loc165) - %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc55) - cir.store %arg0, %0 : !u8i, !cir.ptr loc(#loc60) - cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc60) - %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc61) - %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61) - %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62) - %6 = cir.load align(1) %0 : !cir.ptr, !u8i loc(#loc63) - %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64) - %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64) - %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64) - %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64) - %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63) - %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64) - %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64) - cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc166) - %14 = cir.load %2 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc166) - cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166) - } loc(#loc163) - cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> { - %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc168) - %1 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc169) - %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc68) - cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc73) - cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc73) - %3 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc74) - %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc75) - %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76) - cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr> loc(#loc170) - %6 = cir.load %2 : !cir.ptr>, !cir.vector<4 x !cir.float> loc(#loc170) - cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170) - } loc(#loc167) - cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> { - %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc80) - %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172) - %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173) - cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc84) - %3 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc81) - cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc174) - %4 = cir.load %0 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc174) - cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174) - } loc(#loc171) - cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> { - %0 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc176) - %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 16 : i64} loc(#loc177) - %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc87) - cir.store %arg0, %0 : !u8i, !cir.ptr loc(#loc92) - cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc92) - %3 = cir.load align(16) %1 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc93) - %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93) - %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94) - %6 = cir.load align(1) %0 : !cir.ptr, !u8i loc(#loc95) - %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93) - %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93) - %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95) - %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96) - cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc178) - %11 = cir.load %2 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc178) - cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178) - } loc(#loc175) - cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> { - %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["a", init] {alignment = 16 : i64} loc(#loc180) - %1 = cir.alloca !u8i, !cir.ptr, ["k", init] {alignment = 1 : i64} loc(#loc181) - %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 32 : i64} loc(#loc100) - cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc105) - cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc105) - %3 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc106) - %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc107) - %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108) - cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr> loc(#loc182) - %6 = cir.load %2 : !cir.ptr>, !cir.vector<8 x !cir.float> loc(#loc182) - cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182) - } loc(#loc179) - cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> { - %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc112) - %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184) - %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185) - cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc116) - %3 = cir.load align(64) %1 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc113) - cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc186) - %4 = cir.load %0 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc186) - cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186) - } loc(#loc183) - cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> { - %0 = cir.alloca !u16i, !cir.ptr, ["__U", init] {alignment = 2 : i64} loc(#loc188) - %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["__A", init] {alignment = 32 : i64} loc(#loc189) - %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc119) - cir.store %arg0, %0 : !u16i, !cir.ptr loc(#loc124) - cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc124) - %3 = cir.load align(32) %1 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc125) - %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125) - %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126) - %6 = cir.load align(2) %0 : !cir.ptr, !u16i loc(#loc127) - %7 = cir.const #cir.int<4> : !s32i loc(#loc128) - %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125) - %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125) - %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127) - %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129) - cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc190) - %12 = cir.load %2 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc190) - cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190) - } loc(#loc187) - cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> { - %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr>, ["a", init] {alignment = 32 : i64} loc(#loc192) - %1 = cir.alloca !u16i, !cir.ptr, ["k", init] {alignment = 2 : i64} loc(#loc193) - %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr>, ["__retval"] {alignment = 64 : i64} loc(#loc133) - cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr> loc(#loc138) - cir.store %arg1, %1 : !u16i, !cir.ptr loc(#loc138) - %3 = cir.load align(2) %1 : !cir.ptr, !u16i loc(#loc139) - %4 = cir.load align(32) %0 : !cir.ptr>, !cir.vector<4 x !s64i> loc(#loc140) - %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141) - cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr> loc(#loc194) - %6 = cir.load %2 : !cir.ptr>, !cir.vector<16 x !cir.float> loc(#loc194) - cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194) - } loc(#loc191) -} loc(#loc) -#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0) -#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1) -#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1) -#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63) -#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48) -#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51) -#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56) -#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10) -#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3) -#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57) -#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1) -#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1) -#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66) -#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51) -#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54) -#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59) -#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10) -#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3) -#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60) -#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1) -#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1) -#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67) -#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52) -#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55) -#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60) -#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63) -#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10) -#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3) -#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64) -#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1) -#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1) -#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24) -#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57) -#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32) -#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1) -#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3) -#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1) -#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1) -#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1) -#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59) -#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14) -#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25) -#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19) -#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3) -#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28) -#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1) -#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1) -#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52) -#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29) -#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32) -#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10) -#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3) -#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33) -#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1) -#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1) -#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24) -#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81) -#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32) -#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53) -#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3) -#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1) -#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1) -#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1) -#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62) -#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17) -#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28) -#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19) -#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3) -#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31) -#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1) -#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1) -#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55) -#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33) -#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36) -#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11) -#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4) -#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37) -#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1) -#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1) -#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23) -#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78) -#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31) -#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56) -#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3) -#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1) -#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1) -#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1) -#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63) -#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28) -#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30) -#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18) -#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19) -#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3) -#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42) -#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1) -#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1) -#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56) -#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32) -#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35) -#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10) -#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3) -#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36) -#loc144 = loc(fused[#loc1, #loc2]) -#loc148 = loc(fused[#loc14, #loc15]) -#loc149 = loc(fused[#loc16, #loc17]) -#loc153 = loc(fused[#loc29, #loc30]) -#loc154 = loc(fused[#loc31, #loc32]) -#loc158 = loc(fused[#loc45, #loc46]) -#loc159 = loc(fused[#loc47, #loc48]) -#loc160 = loc(fused[#loc49, #loc50]) -#loc161 = loc(fused[#loc51, #loc50]) -#loc162 = loc(fused[#loc53, #loc50]) -#loc163 = loc(fused[#loc54, #loc55]) -#loc166 = loc(fused[#loc65, #loc66]) -#loc167 = loc(fused[#loc67, #loc68]) -#loc170 = loc(fused[#loc77, #loc78]) -#loc171 = loc(fused[#loc79, #loc80]) -#loc172 = loc(fused[#loc81, #loc82]) -#loc173 = loc(fused[#loc83, #loc82]) -#loc174 = loc(fused[#loc85, #loc82]) -#loc175 = loc(fused[#loc86, #loc87]) -#loc178 = loc(fused[#loc97, #loc98]) -#loc179 = loc(fused[#loc99, #loc100]) -#loc182 = loc(fused[#loc109, #loc110]) -#loc183 = loc(fused[#loc111, #loc112]) -#loc184 = loc(fused[#loc113, #loc114]) -#loc185 = loc(fused[#loc115, #loc114]) -#loc186 = loc(fused[#loc117, #loc114]) -#loc187 = loc(fused[#loc118, #loc119]) -#loc190 = loc(fused[#loc130, #loc131]) -#loc191 = loc(fused[#loc132, #loc133]) -#loc194 = loc(fused[#loc142, #loc143]) From 416199137834e287ad333adfb5181e5941f71826 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 16:28:44 +0000 Subject: [PATCH 06/20] Update Test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index ee42f5de48d98..35fe714fea626 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -8,11 +8,17 @@ #include __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { - // CIR-LABEL: test_vcvtph2ps_mask - // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> - // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}} + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> + // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool> + // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps_mask // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -36,11 +42,15 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { } __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { - // CIR-LABEL: test_vcvtph2ps256_mask - // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}} + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> + // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps256_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -62,12 +72,15 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { } __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { - // CIR-LABEL: test_vcvtph2ps512_mask - // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool> - // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}} + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> + // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool> + // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps512_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> From b91283786ee77431883b0c948a2ffa0bbb773c40 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 16:33:42 +0000 Subject: [PATCH 07/20] Update Test --- clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 35fe714fea626..a376e792005c5 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -8,7 +8,7 @@ #include __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { - // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> From dc57ff39f224e634b02abcea2db7488725ee00cb Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 16:59:17 +0000 Subject: [PATCH 08/20] Update test --- .../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index a376e792005c5..680b37abb5436 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,6 +9,7 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask + // CIR: cir.store {{.*}} : !u8i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> @@ -43,6 +44,7 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask + // CIR: cir.store {{.*}} : !u8i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> @@ -73,6 +75,7 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask + // CIR: cir.store {{.*}} : !u16i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> @@ -102,7 +105,7 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { } __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { - // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps + // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps() @@ -114,7 +117,7 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool> // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]] - // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz + // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}}) // LLVM-LABEL: @test_vcvtph2ps_maskz @@ -139,14 +142,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { } __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { - // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps + // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps() // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz + // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) @@ -169,14 +172,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { } __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { - // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps + // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps() // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz + // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}}) // LLVM-LABEL: @test_vcvtph2ps512_maskz From 1c6c877243dab12efaebcf56f4089ef55ee82683 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 17:45:51 +0000 Subject: [PATCH 09/20] Update Test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 97 +++++++++---------- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 680b37abb5436..0c8960f5a8431 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,50 +9,45 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask - // CIR: cir.store {{.*}} : !u8i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> - // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool> - // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] - - // LLVM-LABEL: @test_vcvtph2ps_mask - // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> - // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> - // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> - // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} - // LLVM: ret <4 x float> {{.*}} - - // OGCG-LABEL: @test_vcvtph2ps_mask - // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> - // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> - // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> - // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} - // OGCG: ret <4 x float> {{.*}} + // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> + // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> + + // LLVM-LABEL: @test_vcvtph2ps_mask + // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> + // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> + // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> + // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} + // LLVM: ret <4 x float> {{.*}} + + // OGCG-LABEL: @test_vcvtph2ps_mask + // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> + // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> + // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> + // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> + // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} + // OGCG: ret <4 x float> {{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k); } __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask - // CIR: cir.store {{.*}} : !u8i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> - // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -75,15 +70,14 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask - // CIR: cir.store {{.*}} : !u16i, !cir.ptr // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool> - // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]] + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> @@ -106,16 +100,12 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps - // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps() // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool> - // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]] + // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> + // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}}) @@ -143,11 +133,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps - // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps() + // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) @@ -173,11 +166,11 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps - // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps() // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}}) From ab226a73182af66ee130ef3477b583d38ddb5e54 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 19:00:41 +0000 Subject: [PATCH 10/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 84 +++++++++++-------- 1 file changed, 47 insertions(+), 37 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 0c8960f5a8431..e1ce6475b66ff 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -8,14 +8,17 @@ #include __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { - // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> - // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i> + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> + // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps_mask // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -41,13 +44,13 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -71,13 +74,13 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int> - // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> @@ -99,13 +102,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { } __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { - // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps() - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> - // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i> + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> + // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}}) @@ -132,15 +139,15 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { } __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { - // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps() - // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) @@ -165,12 +172,15 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { } __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { - // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps + // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz + // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps() - // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int> - // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> + // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}}) From 47dfc767457b8c405b04e74cd80ab4e823d3cdfd Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Thu, 25 Dec 2025 19:38:21 +0000 Subject: [PATCH 11/20] Update test --- .../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index e1ce6475b66ff..6ec6e7447f72e 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -13,7 +13,8 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i> + // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i> // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> @@ -107,7 +108,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i> + // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i> // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> @@ -135,7 +137,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}} // OGCG: ret <4 x float> {{.*}} - return _mm_maskz_cvtph_ps(k, a); + typedef short __v8hi __attribute__((__vector_size__(16))); + return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k); } __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { @@ -168,7 +171,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}} // OGCG: ret <8 x float> {{.*}} - return _mm256_maskz_cvtph_ps(k, a); + typedef short __v8hi __attribute__((__vector_size__(16))); + return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k); } __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { @@ -200,5 +204,6 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}} // OGCG: ret <16 x float> {{.*}} - return _mm512_maskz_cvtph_ps(k, a); + typedef short __v16hi __attribute__((__vector_size__(32))); + return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4); } From be0c47638032ced5344ea7e36399ce64283e9fca Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Fri, 26 Dec 2025 03:58:51 +0000 Subject: [PATCH 12/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 80 ++++++++----------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 6ec6e7447f72e..f5140502595d9 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -14,12 +14,12 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i> + // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> - // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps_mask // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps256_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps512_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> @@ -104,20 +104,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz - // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps() - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i> - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int> - // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> - - // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz - // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}}) + // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps() + // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> + // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]] // LLVM-LABEL: @test_vcvtph2ps_maskz // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -143,18 +140,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps() - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> - - // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz - // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) - + // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps() + // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]] // LLVM-LABEL: @test_vcvtph2ps256_maskz // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -177,17 +170,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps() - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> - - // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz - // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}}) + // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps() + // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]] // LLVM-LABEL: @test_vcvtph2ps512_maskz // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> From 168d3a1d2ed4076da58c363e2130789f3472661d Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Fri, 26 Dec 2025 05:06:39 +0000 Subject: [PATCH 13/20] Fix formatting --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index f5140502595d9..52e0e1c4298fe 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -17,9 +17,9 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] + // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps_mask // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] + // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps256_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]] + // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] // LLVM-LABEL: @test_vcvtph2ps512_mask // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> @@ -112,9 +112,9 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]] + // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]] // LLVM-LABEL: @test_vcvtph2ps_maskz // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -146,8 +146,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]] + // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]] // LLVM-LABEL: @test_vcvtph2ps256_maskz // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> @@ -176,8 +176,8 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]] + // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]] // LLVM-LABEL: @test_vcvtph2ps512_maskz // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> From f63bfe3376c7c7520b1ea4abfb26805d03f02ae8 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Fri, 26 Dec 2025 06:11:31 +0000 Subject: [PATCH 14/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 257 +++++++++--------- 1 file changed, 129 insertions(+), 128 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 52e0e1c4298fe..1672e51a0f40e 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,190 +9,191 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> + // CIR: cir.load {{.*}} : !cir.ptr, !u8i + // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i> + // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}} // LLVM-LABEL: @test_vcvtph2ps_mask - // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> - // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> - // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> - // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} + // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> + // LLVM: bitcast <4 x i16> {{.*}} to <4 x half> + // LLVM: fpext <4 x half> {{.*}} to <4 x float> + // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> + // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer + // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} // LLVM: ret <4 x float> {{.*}} // OGCG-LABEL: @test_vcvtph2ps_mask - // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> - // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half> - // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float> - // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}} - // OGCG: ret <4 x float> {{.*}} + // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> + // OGCG: fpext <4 x half> {{.*}} to <4 x float> + // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> + // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer + // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k); } __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> + // CIR: cir.load {{.*}} : !cir.ptr, !u8i + // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_mask - // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> - // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> - // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> - // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}} - // LLVM: ret <8 x float> {{.*}} + // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: bitcast <8 x i16> {{.*}} to <8 x half> + // LLVM: fpext <8 x half> {{.*}} to <8 x float> + // LLVM: bitcast i8 {{.*}} to <8 x i1> + // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer + // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} // OGCG-LABEL: @test_vcvtph2ps256_mask - // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> - // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> - // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> - // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}} - // OGCG: ret <8 x float> {{.*}} + // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: bitcast <8 x i16> {{.*}} to <8 x half> + // OGCG: fpext <8 x half> {{.*}} to <8 x float> + // OGCG: bitcast i8 {{.*}} to <8 x i1> + // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer + // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k); } __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask - // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> - // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]] + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> + // CIR: cir.load {{.*}} : !cir.ptr, !u16i + // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_mask - // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> - // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half> - // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float> - // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> - // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}} - // LLVM: ret <16 x float> {{.*}} + // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16> + // LLVM: bitcast <16 x i16> {{.*}} to <16 x half> + // LLVM: fpext <16 x half> {{.*}} to <16 x float> + // LLVM: bitcast i16 {{.*}} to <16 x i1> + // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer + // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} // OGCG-LABEL: @test_vcvtph2ps512_mask - // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> - // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half> - // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float> - // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> - // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}} - // OGCG: ret <16 x float> {{.*}} + // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16> + // OGCG: bitcast <16 x i16> {{.*}} to <16 x half> + // OGCG: fpext <16 x half> {{.*}} to <16 x float> + // OGCG: bitcast i16 {{.*}} to <16 x i1> + // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer + // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4); } __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz - // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps() - // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i> - // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]] + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} // LLVM-LABEL: @test_vcvtph2ps_maskz - // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> - // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half> - // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float> - // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> + // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> + // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> + // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> + // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}} // LLVM: ret <4 x float> {{.*}} // OGCG-LABEL: @test_vcvtph2ps_maskz - // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> - // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half> - // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float> - // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> - // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> + // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> + // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> + // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> + // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> + // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}} // OGCG: ret <4 x float> {{.*}} - typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k); } __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz - // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps() - // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]] + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} // LLVM-LABEL: @test_vcvtph2ps256_maskz - // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> - // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> - // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> - // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}} - // LLVM: ret <8 x float> {{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> + // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> + // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> + // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}} + // LLVM: ret <8 x float> {{.*}} // OGCG-LABEL: @test_vcvtph2ps256_maskz - // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half> - // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float> - // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1> - // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> + // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> + // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> + // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}} // OGCG: ret <8 x float> {{.*}} - typedef short __v8hi __attribute__((__vector_size__(16))); - return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k); + typedef short __v8hi __attribute__((__vector_size__(16))); + return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k); } __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz - // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps() - // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]] + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} // LLVM-LABEL: @test_vcvtph2ps512_maskz - // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> - // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half> - // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float> - // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> - // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}} + // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16> + // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> + // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> + // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1> + // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} // LLVM: ret <16 x float> {{.*}} - + // OGCG-LABEL: @test_vcvtph2ps512_maskz - // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16> - // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half> - // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float> - // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1> - // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}} + // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16> + // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> + // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> + // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1> + // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} // OGCG: ret <16 x float> {{.*}} typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4); From 964139c73bdc2f966ffb9ad5197e71b4e27f4450 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Fri, 26 Dec 2025 07:28:29 +0000 Subject: [PATCH 15/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 224 ++++++++++-------- 1 file changed, 127 insertions(+), 97 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 1672e51a0f40e..f26e0d5d5ccae 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,95 +9,113 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> - // CIR: cir.load {{.*}} : !cir.ptr, !u8i - // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i> - // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}} + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps_mask - // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> - // LLVM: bitcast <4 x i16> {{.*}} to <4 x half> - // LLVM: fpext <4 x half> {{.*}} to <4 x float> - // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> - // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer - // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} - // LLVM: ret <4 x float> {{.*}} - + // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}} + // LLVM: %{{.*}} = load i8, ptr %{{.*}} + // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> + // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> + // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // LLVM: ret <4 x float> %{{.*}} + // OGCG-LABEL: @test_vcvtph2ps_mask - // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> - // OGCG: fpext <4 x half> {{.*}} to <4 x float> - // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> - // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer - // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}} + // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}} + // OGCG: %{{.*}} = load i8, ptr %{{.*}} + // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> + // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> + // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> + // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k); } __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> - // CIR: cir.load {{.*}} : !cir.ptr, !u8i - // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_mask - // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16> - // LLVM: bitcast <8 x i16> {{.*}} to <8 x half> - // LLVM: fpext <8 x half> {{.*}} to <8 x float> - // LLVM: bitcast i8 {{.*}} to <8 x i1> - // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer - // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} + // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> + // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> + // LLVM: %{{.*}} = load i8, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_mask - // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16> - // OGCG: bitcast <8 x i16> {{.*}} to <8 x half> - // OGCG: fpext <8 x half> {{.*}} to <8 x float> - // OGCG: bitcast i8 {{.*}} to <8 x i1> - // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer - // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}} + // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> + // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> + // OGCG: %{{.*}} = load i8, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k); } __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> - // CIR: cir.load {{.*}} : !cir.ptr, !u16i - // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u16i + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_mask - // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16> - // LLVM: bitcast <16 x i16> {{.*}} to <16 x half> - // LLVM: fpext <16 x half> {{.*}} to <16 x float> - // LLVM: bitcast i16 {{.*}} to <16 x i1> - // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer - // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} + // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> + // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> + // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> + // LLVM: %{{.*}} = load i16, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_mask - // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16> - // OGCG: bitcast <16 x i16> {{.*}} to <16 x half> - // OGCG: fpext <16 x half> {{.*}} to <16 x float> - // OGCG: bitcast i16 {{.*}} to <16 x i1> - // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer - // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}} + // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> + // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> + // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> + // OGCG: %{{.*}} = load i16, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4); } @@ -109,32 +127,36 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i> - // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps_maskz - // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // LLVM: %{{.*}} = load i8, ptr %{{.*}} // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> - // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> - // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}} - // LLVM: ret <4 x float> {{.*}} + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // LLVM: ret <4 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps_maskz - // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> + // OGCG: %{{.*}} = load i8, ptr %{{.*}} // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> - // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> - // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}} - // OGCG: ret <4 x float> {{.*}} + // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> + // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k); } @@ -148,23 +170,27 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_maskz - // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> - // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}} - // LLVM: ret <8 x float> {{.*}} + // LLVM: %{{.*}} = load i8, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // LLVM: ret <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_maskz - // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16> + // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1> - // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}} - // OGCG: ret <8 x float> {{.*}} + // OGCG: %{{.*}} = load i8, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k); } @@ -178,23 +204,27 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}} + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_maskz - // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16> + // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1> - // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} - // LLVM: ret <16 x float> {{.*}} + // LLVM: %{{.*}} = load i16, ptr %{{.*}} + // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // LLVM: ret <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_maskz - // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16> + // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1> - // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} - // OGCG: ret <16 x float> {{.*}} + // OGCG: %{{.*}} = load i16, ptr %{{.*}} + // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> + // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4); } From 9a999f526c9fdc21ebcb2ee7c1718b49096a48cc Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Sat, 27 Dec 2025 14:35:47 +0000 Subject: [PATCH 16/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 93 ++----------------- 1 file changed, 9 insertions(+), 84 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index f26e0d5d5ccae..8ce29b57de275 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,36 +9,25 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i> // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps_mask - // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}} - // LLVM: %{{.*}} = load i8, ptr %{{.*}} // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - // LLVM: ret <4 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps_mask - // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}} - // OGCG: %{{.*}} = load i8, ptr %{{.*}} // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> @@ -52,32 +41,20 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<8 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps256_mask - // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}} // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // LLVM: %{{.*}} = load i8, ptr %{{.*}} // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_mask - // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}} // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // OGCG: %{{.*}} = load i8, ptr %{{.*}} // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} @@ -87,32 +64,20 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<16 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u16i // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps512_mask - // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}} - // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> - // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}} // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // LLVM: %{{.*}} = load i16, ptr %{{.*}} // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_mask - // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}} - // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> - // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}} // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // OGCG: %{{.*}} = load i16, ptr %{{.*}} // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} @@ -122,35 +87,20 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i - // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i> + // CIR: %{{.*}} = cir.call @_mm_setzero_ps() // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> - // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps_maskz - // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // LLVM: %{{.*}} = load i8, ptr %{{.*}} - // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - // LLVM: ret <4 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps_maskz - // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // OGCG: %{{.*}} = load i8, ptr %{{.*}} - // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> @@ -163,31 +113,18 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<2 x !s64i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> - // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u8i + // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps256_maskz - // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // LLVM: %{{.*}} = load i8, ptr %{{.*}} // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - // LLVM: ret <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_maskz - // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}} - // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // OGCG: %{{.*}} = load i8, ptr %{{.*}} // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} @@ -197,31 +134,19 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr>, !cir.vector<4 x !s64i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> - // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> - // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr, !u16i - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> - // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} // LLVM-LABEL: @test_vcvtph2ps512_maskz - // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}} - // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // LLVM: %{{.*}} = load i16, ptr %{{.*}} // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - // LLVM: ret <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_maskz - // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}} - // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16> // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // OGCG: %{{.*}} = load i16, ptr %{{.*}} // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} From 55240b56b3bca7336076d5dad83ed8156c0b245a Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Sat, 27 Dec 2025 15:02:52 +0000 Subject: [PATCH 17/20] Update test --- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 93 +++++-------------- 1 file changed, 23 insertions(+), 70 deletions(-) diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 8ce29b57de275..381e5d32af770 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -9,147 +9,100 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int> + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps_mask - // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> - // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // LLVM: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps_mask - // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16> - // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> - // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - + // OGCG: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k); } __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_mask - // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // LLVM: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_mask - // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half> // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - + // OGCG: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k); } __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_mask - // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // LLVM: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_mask - // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - + // OGCG: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4); } __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz - // CIR: %{{.*}} = cir.call @_mm_setzero_ps() // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int> + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<4 x !cir.int>, !cir.vector<4 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps_maskz - // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + // LLVM: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps_maskz - // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half> // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float> - // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> - // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} - + // OGCG: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k); } __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz - // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int>, !cir.vector<8 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps256_maskz // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + // LLVM: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps256_maskz // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float> - // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1> - // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} - + // OGCG: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} typedef short __v8hi __attribute__((__vector_size__(16))); return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k); } __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz - // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() - // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int)}}> - // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} + // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> + // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int>, !cir.vector<16 x !cir.float> // LLVM-LABEL: @test_vcvtph2ps512_maskz - // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + // LLVM: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} // OGCG-LABEL: @test_vcvtph2ps512_maskz - // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half> // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float> - // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1> - // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - + // OGCG: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4); } From 1a3a6b85617456718bab83f51d0c5f5f827c24f9 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Tue, 6 Jan 2026 17:10:09 +0000 Subject: [PATCH 18/20] emit full intrinsic name and update test --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 32 +++++++++++---- .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 39 +++++++++++++++++++ 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index a0ad2727c78a3..ae6a39bfd0f13 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -449,17 +449,33 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, // Convert F16 halfs to floats. static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, - const StringRef str, llvm::ArrayRef ops, - mlir::Type dstTy) { + mlir::Type dstTy, + unsigned builtinID) { assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) && "Unknown cvtph2ps intrinsic"); // If the SAE intrinsic doesn't use default rounding then we can't upgrade. - if (ops.size() == 4 && - ops[3].getDefiningOp().getIntValue().getZExtValue() != - 4) { - return emitIntrinsicCallOp(builder, loc, str, dstTy, ops); + if (ops.size() == 4) { + auto constOp = ops[3].getDefiningOp(); + assert(constOp && "Expected constant operand"); + if (constOp.getIntValue().getZExtValue() != 4) { + StringRef intrinsicName; + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_vcvtph2ps_mask: + intrinsicName = "x86.avx512.mask.vcvtph2ps.128"; + break; + case X86::BI__builtin_ia32_vcvtph2ps256_mask: + intrinsicName = "x86.avx512.mask.vcvtph2ps.256"; + break; + case X86::BI__builtin_ia32_vcvtph2ps512_mask: + intrinsicName = "x86.avx512.mask.vcvtph2ps.512"; + break; + } + return emitIntrinsicCallOp(builder, loc, intrinsicName, dstTy, ops); + } } unsigned numElts = cast(dstTy).getSize(); @@ -1875,8 +1891,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_vcvtph2ps256_mask: case X86::BI__builtin_ia32_vcvtph2ps512_mask: { mlir::Location loc = getLoc(expr->getExprLoc()); - return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops, - convertType(expr->getType())); + return emitX86CvtF16ToFloatExpr(builder, loc, ops, + convertType(expr->getType()), builtinID); } case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c index 381e5d32af770..900a1b80a0f4d 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c @@ -106,3 +106,42 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) { typedef short __v16hi __attribute__((__vector_size__(32))); return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4); } + +__m512 test_mm512_cvt_roundph_ps(__m256i a) { + // CIR-LABEL: cir.func no_inline dso_local @test_mm512_cvt_roundph_ps + // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float> + + // LLVM-LABEL: @test_mm512_cvt_roundph_ps + // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 -1, i32 8) + + // OGCG-LABEL: @test_mm512_cvt_roundph_ps + // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> zeroinitializer, i16 -1, i32 8) + typedef short __v16hi __attribute__((__vector_size__(32))); + return _mm512_cvt_roundph_ps((__v16hi)a, _MM_FROUND_NO_EXC); +} + +__m512 test_mm512_mask_cvt_roundph_ps(__m512 w, __mmask16 u, __m256i a) { + // CIR-LABEL: cir.func no_inline dso_local @test_mm512_mask_cvt_roundph_ps + // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float> + + // LLVM-LABEL: @test_mm512_mask_cvt_roundph_ps + // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8) + + // OGCG-LABEL: @test_mm512_mask_cvt_roundph_ps + // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8) + typedef short __v16hi __attribute__((__vector_size__(32))); + return _mm512_mask_cvt_roundph_ps(w, u, (__v16hi)a, _MM_FROUND_NO_EXC); +} + +__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 u, __m256i a) { + // CIR-LABEL: cir.func no_inline dso_local @test_mm512_maskz_cvt_roundph_ps + // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float> + + // LLVM-LABEL: @test_mm512_maskz_cvt_roundph_ps + // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8) + + // OGCG-LABEL: @test_mm512_maskz_cvt_roundph_ps + // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8) + typedef short __v16hi __attribute__((__vector_size__(32))); + return _mm512_maskz_cvt_roundph_ps(u, (__v16hi)a, _MM_FROUND_NO_EXC); +} From f02e93d8c143fb664f5125c0863efb2c49919835 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Tue, 6 Jan 2026 18:01:26 +0000 Subject: [PATCH 19/20] Update comments --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index ae6a39bfd0f13..12766d5035305 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -446,7 +446,7 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, return builder.createMul(loc, lhs, rhs); } -// Convert F16 halfs to floats. +// Convert f16 half values to floats. static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, llvm::ArrayRef ops, From b0787e6320acdb5302e7249c9daab72fb7590f82 Mon Sep 17 00:00:00 2001 From: Priyanshu Kumar <10b.priyanshu@gmail.com> Date: Tue, 6 Jan 2026 19:00:37 +0000 Subject: [PATCH 20/20] Update CIRGenBuiltinX86.cpp --- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 12766d5035305..29a89e46bafba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -450,8 +450,7 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc, static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc, llvm::ArrayRef ops, - mlir::Type dstTy, - unsigned builtinID) { + mlir::Type dstTy) { assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) && "Unknown cvtph2ps intrinsic"); @@ -460,21 +459,8 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, auto constOp = ops[3].getDefiningOp(); assert(constOp && "Expected constant operand"); if (constOp.getIntValue().getZExtValue() != 4) { - StringRef intrinsicName; - switch (builtinID) { - default: - llvm_unreachable("Unexpected builtin"); - case X86::BI__builtin_ia32_vcvtph2ps_mask: - intrinsicName = "x86.avx512.mask.vcvtph2ps.128"; - break; - case X86::BI__builtin_ia32_vcvtph2ps256_mask: - intrinsicName = "x86.avx512.mask.vcvtph2ps.256"; - break; - case X86::BI__builtin_ia32_vcvtph2ps512_mask: - intrinsicName = "x86.avx512.mask.vcvtph2ps.512"; - break; - } - return emitIntrinsicCallOp(builder, loc, intrinsicName, dstTy, ops); + return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512", + dstTy, ops); } } @@ -1892,7 +1878,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_vcvtph2ps512_mask: { mlir::Location loc = getLoc(expr->getExprLoc()); return emitX86CvtF16ToFloatExpr(builder, loc, ops, - convertType(expr->getType()), builtinID); + convertType(expr->getType())); } case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: