[X86][CIR]Implement handling for F16 halfs to float conversion builtins by Priyanshu3820 · Pull Request #173572 · llvm/llvm-project

Priyanshu3820 · 2025-12-25T14:28:09Z

Related to: #167765
This PR implements-
BI__builtin_ia32_vcvtph2ps_mask
BI__builtin_ia32_vcvtph2ps256_mask
BI__builtin_ia32_vcvtph2ps512_mask

llvmbot · 2025-12-25T14:28:37Z

@llvm/pr-subscribers-clangir

@llvm/pr-subscribers-clang

Author: Priyanshu Kumar (Priyanshu3820)

Changes

Related to: #167765

Full diff: https://github.com/llvm/llvm-project/pull/173572.diff

2 Files Affected:

(modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+55-1)
(added) clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c (+185)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..07f915b51ad6d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,20 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            const StringRef str,
+                                            llvm::ArrayRef<mlir::Value> ops,
+                                            mlir::Type dstTy) {
+  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+         "Unknown cvtph2ps intrinsic");
+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4 &&
+      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+          4) {
+    return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+  }
+
+  unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
+  mlir::Value src = ops[0];
+
+  // Extract the subvector
+  if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
+    assert(numElts == 4 && "Unexpected vector size");
+    src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
+  }
+
+  // Bitcast from vXi16 to vXf16.
+  cir::VectorType halfTy = cir::VectorType::get(
+      cir::FP16Type::get(builder.getContext()), numElts);
+
+  src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
+
+  // Perform the fp-extension
+  mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
+
+  if (ops.size() >= 3)
+    res = emitX86Select(builder, loc, ops[2], res, ops[1]);
+  return res;
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
+                                    convertType(expr->getType()));
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
new file mode 100644
index 0000000000000..ee42f5de48d98
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
+    // CIR-LABEL: test_vcvtph2ps_mask
+    // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+    // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+    // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+    // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+    // LLVM-LABEL: @test_vcvtph2ps_mask
+    // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // LLVM: ret <4 x float> {{.*}}
+
+    // OGCG-LABEL: @test_vcvtph2ps_mask
+    // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // OGCG: ret <4 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
+}
+
+__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
+  // CIR-LABEL: test_vcvtph2ps256_mask
+  // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps256_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps256_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
+}
+
+__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
+  // CIR-LABEL: test_vcvtph2ps512_mask
+  // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
+  // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps512_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps512_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
+}
+
+__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+  // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+  // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // LLVM: ret <4 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // OGCG: ret <4 x float> {{.*}}
+
+  return _mm_maskz_cvtph_ps(k, a);
+}
+
+__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
+
+
+  // LLVM-LABEL: @test_vcvtph2ps256_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}} 
+
+  // OGCG-LABEL: @test_vcvtph2ps256_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+   return _mm256_maskz_cvtph_ps(k, a);
+}
+
+__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps512_maskz
+  // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+  
+  // OGCG-LABEL: @test_vcvtph2ps512_maskz
+  // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  return _mm512_maskz_cvtph_ps(k, a);
+}

github-actions · 2025-12-25T14:34:02Z

✅ With the latest revision this PR passed the C/C++ code formatter.

github-actions · 2025-12-25T15:40:39Z

🐧 Linux x64 Test Results

113440 tests passed
4159 tests skipped

✅ The build succeeded and all tests passed.

…project into convert-fp16

andykaylor · 2026-01-05T22:03:47Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4 &&
+      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=


I believe getDefiningOp<cir::ConstantOp>() will return a null value without asserting if the defining op is not a cir::ConstantOp. That shouldn't happen, but it might make this tricky to debug if it does happen. I'd suggest rewriting this so that you can add an assert before calling getIntValue.

andykaylor · 2026-01-05T22:13:59Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            const StringRef str,


I don't think you need this parameter, and the way you're using it seems to be incorrect.

Replaced with the respective intrinsic names

andykaylor · 2026-01-05T22:14:52Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

+  if (ops.size() == 4 &&
+      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+          4) {
+    return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);


I don't believe any of your test cases hit this case. If you get here, you need to emit a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 and the value you're passing in as str won't do that.

These test cases should get here (copied from clang/test/CodeGen/X86/avx512f-builtins.c)

__m512 test_mm512_cvt_roundph_ps(__m256i __A) { // CHECK-LABEL: test_mm512_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_cvt_roundph_ps(__A, _MM_FROUND_NO_EXC); } __m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm512_mask_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm512_maskz_cvt_roundph_ps // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512( return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_NO_EXC); }

Copilot

Pull request overview

This PR implements CIR (Clang Intermediate Representation) support for three X86 F16 (half-precision float) to float conversion builtins, contributing to the broader effort tracked in issue #167765 to implement missing X86 builtins in the CIR incubator.

Adds the emitX86CvtF16ToFloatExpr helper function to handle F16-to-float conversions with proper rounding mode support
Implements the three masked conversion builtins with support for both default rounding and explicit rounding modes via LLVM intrinsics
Provides comprehensive test coverage including mask, maskz, and rounding variants

Reviewed changes

Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.

File	Description
clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c	New comprehensive test file with 9 test functions covering all three builtins with mask, maskz, and rounding variants, comparing CIR, LLVM, and original CodeGen outputs
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp	Implements `emitX86CvtF16ToFloatExpr` function (lines 449-502) to handle F16-to-float conversion logic including subvector extraction, bitcasting, fp-extension, and masking; adds switch case entries (lines 1890-1896) to route the three builtins to this implementation

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

Copilot · 2026-01-06T17:16:55Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

  return builder.createMul(loc, lhs, rhs);
 }

+// Convert F16 halfs to floats.


The word "halfs" should be "halves" (the correct plural form of "half").

Suggested change

// Convert F16 halfs to floats.

// Convert F16 halves to floats.

Although Copilot's suggestion is correct in terms of grammar, I wouldn't make the suggested change. The use of halfs is awkard though. Perhaps Convert f16 half values to floats.

andykaylor · 2026-01-06T18:32:01Z

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

+    assert(constOp && "Expected constant operand");
+    if (constOp.getIntValue().getZExtValue() != 4) {
+      StringRef intrinsicName;
+      switch (builtinID) {


Only the x86.avx512.mask.vcvtph2ps.512 form of this intrinsic takes a rounding mode parameter, so you don't need to switch on the builtin ID here. Note that in classic codegen EmitX86CvtF16ToFloatExpr always generates a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 here. We should do the same.

andykaylor

lgtm

llvm-ci · 2026-01-07T01:01:03Z

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building clang at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/26994

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM-Unit :: CAS/./CASTests/12/18' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:/buildbot/worker/arc-folder/build/unittests/CAS/./CASTests-LLVM-Unit-32526-12-18.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=18 GTEST_SHARD_INDEX=12 /buildbot/worker/arc-folder/build/unittests/CAS/./CASTests
--

Script:
--
/buildbot/worker/arc-folder/build/unittests/CAS/./CASTests --gtest_filter=CASProgramTest.MappedFileRegionArenaTest
--
Note: Google Test filter = CASProgramTest.MappedFileRegionArenaTest
[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from CASProgramTest
/buildbot/worker/arc-folder/llvm-project/llvm/unittests/CAS/ProgramTest.cpp:147: Failure
Value of: PI.ReturnCode == 0
  Actual: false
Expected: true


/buildbot/worker/arc-folder/llvm-project/llvm/unittests/CAS/ProgramTest.cpp:147
Value of: PI.ReturnCode == 0
  Actual: false
Expected: true



********************

llvm-ci · 2026-01-07T01:14:02Z

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building clang at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/23836

Here is the relevant piece of the build log for the reference

Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: tools/lldb-dap/exception/cpp/TestDAP_exception_cpp.py (1243 of 2637)
PASS: lldb-api :: tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py (1244 of 2637)
PASS: lldb-api :: tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py (1245 of 2637)
PASS: lldb-api :: tools/lldb-dap/io/TestDAP_io.py (1246 of 2637)
PASS: lldb-api :: tools/lldb-dap/locations/TestDAP_locations.py (1247 of 2637)
PASS: lldb-api :: tools/lldb-dap/cancel/TestDAP_cancel.py (1248 of 2637)
PASS: lldb-api :: tools/lldb-dap/evaluate/TestDAP_evaluate.py (1249 of 2637)
PASS: lldb-api :: tools/lldb-dap/memory/TestDAP_memory.py (1250 of 2637)
PASS: lldb-api :: tools/lldb-dap/moduleSymbols/TestDAP_moduleSymbols.py (1251 of 2637)
PASS: lldb-api :: tools/lldb-dap/optimized/TestDAP_optimized.py (1252 of 2637)
FAIL: lldb-api :: tools/lldb-dap/output/TestDAP_output.py (1253 of 2637)
******************** TEST 'lldb-api :: tools/lldb-dap/output/TestDAP_output.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/output -p TestDAP_output.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 3bdda61f8cdbab6adca9a6b579c14f579c299974)
  clang revision 3bdda61f8cdbab6adca9a6b579c14f579c299974
  llvm revision 3bdda61f8cdbab6adca9a6b579c14f579c299974
Skipping the following test categories: libc++, msvcstl, dsym, pdb, gmodules, debugserver, objc

--
Command Output (stderr):
--
========= DEBUG ADAPTER PROTOCOL LOGS =========
[01:10:14.505] (stdio) --> {"command":"initialize","type":"request","arguments":{"adapterID":"lldb-native","clientID":"vscode","columnsStartAt1":true,"linesStartAt1":true,"locale":"en-us","pathFormat":"path","supportsRunInTerminalRequest":true,"supportsVariablePaging":true,"supportsVariableType":true,"supportsStartDebuggingRequest":true,"supportsProgressReporting":true,"supportsInvalidatedEvent":true,"supportsMemoryEvent":true,"$__lldb_sourceInitFile":false},"seq":1}
[01:10:14.505] DAP.cpp:1002 (stdio) queued (command=initialize seq=1)
[01:10:14.509] (stdio) <-- {"body":{"$__lldb_version":"lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 3bdda61f8cdbab6adca9a6b579c14f579c299974)\n  clang revision 3bdda61f8cdbab6adca9a6b579c14f579c299974\n  llvm revision 3bdda61f8cdbab6adca9a6b579c14f579c299974","completionTriggerCharacters":["."," ","\t"],"exceptionBreakpointFilters":[{"description":"C++ Catch","filter":"cpp_catch","label":"C++ Catch","supportsCondition":true},{"description":"C++ Throw","filter":"cpp_throw","label":"C++ Throw","supportsCondition":true},{"description":"Objective-C Catch","filter":"objc_catch","label":"Objective-C Catch","supportsCondition":true},{"description":"Objective-C Throw","filter":"objc_throw","label":"Objective-C Throw","supportsCondition":true}],"supportTerminateDebuggee":true,"supportsBreakpointLocationsRequest":true,"supportsCancelRequest":true,"supportsCompletionsRequest":true,"supportsConditionalBreakpoints":true,"supportsConfigurationDoneRequest":true,"supportsDataBreakpointBytes":true,"supportsDataBreakpoints":true,"supportsDelayedStackTraceLoading":true,"supportsDisassembleRequest":true,"supportsEvaluateForHovers":true,"supportsExceptionFilterOptions":true,"supportsExceptionInfoRequest":true,"supportsFunctionBreakpoints":true,"supportsHitConditionalBreakpoints":true,"supportsInstructionBreakpoints":true,"supportsLogPoints":true,"supportsModuleSymbolsRequest":true,"supportsModulesRequest":true,"supportsReadMemoryRequest":true,"supportsSetVariable":true,"supportsSteppingGranularity":true,"supportsValueFormattingOptions":true,"supportsWriteMemoryRequest":true},"command":"initialize","request_seq":1,"seq":1,"success":true,"type":"response"}
[01:10:14.510] (stdio) --> {"command":"launch","type":"request","arguments":{"program":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","initCommands":["settings clear --all","settings set symbols.enable-external-lookup false","settings set target.inherit-tcc true","settings set target.disable-aslr false","settings set target.detach-on-error false","settings set target.auto-apply-fixits false","settings set plugin.process.gdb-remote.packet-timeout 60","settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"","settings set use-color false","settings set show-statusline false"],"exitCommands":["?script print('out\\0\\0', end='\\r\\n', file=sys.stdout)","?script print('err\\0\\0', end='\\r\\n', file=sys.stderr)"],"disableASLR":false,"enableAutoVariableSummaries":false,"enableSyntheticChildDebugging":false,"displayExtendedBacktrace":false},"seq":2}
[01:10:14.510] DAP.cpp:1002 (stdio) queued (command=launch seq=2)
[01:10:14.513] (stdio) <-- {"body":{"category":"console","output":"Running initCommands:\n"},"event":"output","seq":2,"type":"event"}
[01:10:14.513] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings clear --all\n"},"event":"output","seq":3,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set symbols.enable-external-lookup false\n"},"event":"output","seq":4,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.inherit-tcc true\n"},"event":"output","seq":5,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.disable-aslr false\n"},"event":"output","seq":6,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.detach-on-error false\n"},"event":"output","seq":7,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.auto-apply-fixits false\n"},"event":"output","seq":8,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set plugin.process.gdb-remote.packet-timeout 60\n"},"event":"output","seq":9,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"\n"},"event":"output","seq":10,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set use-color false\n"},"event":"output","seq":11,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set show-statusline false\n"},"event":"output","seq":12,"type":"event"}
[01:10:15.255] (stdio) <-- {"event":"initialized","seq":13,"type":"event"}
[01:10:15.258] (stdio) <-- {"body":{"module":{"addressRange":"0xf26d8000","debugInfoSize":"983.3KB","id":"C8C14F49-271D-D5EC-D97C-F1A70B997423-FCAC2BC1","name":"ld-linux-armhf.so.3","path":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolFilePath":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":14,"type":"event"}
[01:10:15.260] (stdio) <-- {"body":{"module":{"addressRange":"0x1590000","debugInfoSize":"1.2KB","id":"A42661FB","name":"a.out","path":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","symbolFilePath":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":15,"type":"event"}
[01:10:15.260] (stdio) --> {"command":"setBreakpoints","type":"request","arguments":{"source":{"path":"main.c"},"sourceModified":false,"lines":[10],"breakpoints":[{"line":10}]},"seq":3}

Implement handling for F16 halfs to floats conversion builtins

21e5828

Priyanshu3820 requested review from andykaylor, bcardosolopes, lanza and xlauko as code owners December 25, 2025 14:28

llvmbot added clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project labels Dec 25, 2025

Priyanshu3820 added 2 commits December 25, 2025 14:30

Remove unwanted headers included by clangd

b73200c

Fix formatting

a724614

Priyanshu3820 added 3 commits December 25, 2025 14:34

Fix formatting

b29a415

Delete clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir

284c875

Merge branch 'main' into convert-fp16

25d3d72

Priyanshu3820 added 13 commits December 25, 2025 16:28

Update Test

4161991

Merge branch 'convert-fp16' of https://github.com/Priyanshu3820/llvm-…

6867097

…project into convert-fp16

Update Test

b912837

Update test

dc57ff3

Update Test

1c6c877

Update test

ab226a7

Update test

47dfc76

Update test

be0c476

Fix formatting

168d3a1

Update test

f63bfe3

Update test

964139c

Update test

9a999f5

Update test

55240b5

Priyanshu3820 mentioned this pull request Dec 27, 2025

[CIR][X86]Implement handling for convert-half builtins #173143

Merged

andykaylor reviewed Jan 5, 2026

View reviewed changes

emit full intrinsic name and update test

1a3a6b8

Copilot AI review requested due to automatic review settings January 6, 2026 17:11

Copilot started reviewing on behalf of Priyanshu3820 January 6, 2026 17:12 View session

Copilot AI reviewed Jan 6, 2026

View reviewed changes

Update comments

f02e93d

andykaylor reviewed Jan 6, 2026

View reviewed changes

Update CIRGenBuiltinX86.cpp

b0787e6

andykaylor approved these changes Jan 6, 2026

View reviewed changes

andykaylor merged commit 3bdda61 into llvm:main Jan 7, 2026
10 checks passed

	// Convert F16 halfs to floats.
	// Convert F16 halves to floats.

Conversation

Priyanshu3820 commented Dec 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Dec 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Dec 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Priyanshu3820 Jan 6, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull request overview

Reviewed changes

Uh oh!

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

Uh oh!

andykaylor Jan 6, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

andykaylor left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Jan 7, 2026

Uh oh!

llvm-ci commented Jan 7, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Priyanshu3820 commented Dec 25, 2025 •

edited

Loading

llvmbot commented Dec 25, 2025 •

edited

Loading

github-actions bot commented Dec 25, 2025 •

edited

Loading

github-actions bot commented Dec 25, 2025 •

edited

Loading

Priyanshu3820 Jan 6, 2026 •

edited

Loading

andykaylor Jan 6, 2026 •

edited

Loading