Skip to content

[X86][CIR]Implement handling for F16 halfs to float conversion builtins #173572

Merged
andykaylor merged 22 commits intollvm:mainfrom
Priyanshu3820:convert-fp16
Jan 7, 2026
Merged

[X86][CIR]Implement handling for F16 halfs to float conversion builtins #173572
andykaylor merged 22 commits intollvm:mainfrom
Priyanshu3820:convert-fp16

Conversation

@Priyanshu3820
Copy link
Contributor

@Priyanshu3820 Priyanshu3820 commented Dec 25, 2025

Related to: #167765
This PR implements-
BI__builtin_ia32_vcvtph2ps_mask
BI__builtin_ia32_vcvtph2ps256_mask
BI__builtin_ia32_vcvtph2ps512_mask

@llvmbot llvmbot added clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project labels Dec 25, 2025
@llvmbot
Copy link
Member

llvmbot commented Dec 25, 2025

@llvm/pr-subscribers-clangir

@llvm/pr-subscribers-clang

Author: Priyanshu Kumar (Priyanshu3820)

Changes

Related to: #167765


Full diff: https://github.com/llvm/llvm-project/pull/173572.diff

2 Files Affected:

  • (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+55-1)
  • (added) clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c (+185)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..07f915b51ad6d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,20 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            const StringRef str,
+                                            llvm::ArrayRef<mlir::Value> ops,
+                                            mlir::Type dstTy) {
+  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+         "Unknown cvtph2ps intrinsic");
+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4 &&
+      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+          4) {
+    return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+  }
+
+  unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
+  mlir::Value src = ops[0];
+
+  // Extract the subvector
+  if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
+    assert(numElts == 4 && "Unexpected vector size");
+    src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
+  }
+
+  // Bitcast from vXi16 to vXf16.
+  cir::VectorType halfTy = cir::VectorType::get(
+      cir::FP16Type::get(builder.getContext()), numElts);
+
+  src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
+
+  // Perform the fp-extension
+  mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
+
+  if (ops.size() >= 3)
+    res = emitX86Select(builder, loc, ops[2], res, ops[1]);
+  return res;
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
+                                    convertType(expr->getType()));
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
new file mode 100644
index 0000000000000..ee42f5de48d98
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
+    // CIR-LABEL: test_vcvtph2ps_mask
+    // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+    // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+    // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+    // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+    // LLVM-LABEL: @test_vcvtph2ps_mask
+    // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // LLVM: ret <4 x float> {{.*}}
+
+    // OGCG-LABEL: @test_vcvtph2ps_mask
+    // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // OGCG: ret <4 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
+}
+
+__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
+  // CIR-LABEL: test_vcvtph2ps256_mask
+  // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps256_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps256_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
+}
+
+__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
+  // CIR-LABEL: test_vcvtph2ps512_mask
+  // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
+  // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps512_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps512_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
+}
+
+__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+  // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+  // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // LLVM: ret <4 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // OGCG: ret <4 x float> {{.*}}
+
+  return _mm_maskz_cvtph_ps(k, a);
+}
+
+__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
+
+
+  // LLVM-LABEL: @test_vcvtph2ps256_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}} 
+
+  // OGCG-LABEL: @test_vcvtph2ps256_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+   return _mm256_maskz_cvtph_ps(k, a);
+}
+
+__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps512_maskz
+  // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+  
+  // OGCG-LABEL: @test_vcvtph2ps512_maskz
+  // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  return _mm512_maskz_cvtph_ps(k, a);
+}

@github-actions
Copy link

github-actions bot commented Dec 25, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@github-actions
Copy link

github-actions bot commented Dec 25, 2025

🐧 Linux x64 Test Results

  • 113440 tests passed
  • 4159 tests skipped

✅ The build succeeded and all tests passed.


// If the SAE intrinsic doesn't use default rounding then we can't upgrade.
if (ops.size() == 4 &&
ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe getDefiningOp<cir::ConstantOp>() will return a null value without asserting if the defining op is not a cir::ConstantOp. That shouldn't happen, but it might make this tricky to debug if it does happen. I'd suggest rewriting this so that you can add an assert before calling getIntValue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// Convert F16 halfs to floats.
static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
mlir::Location loc,
const StringRef str,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you need this parameter, and the way you're using it seems to be incorrect.

Copy link
Contributor Author

@Priyanshu3820 Priyanshu3820 Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced with the respective intrinsic names

if (ops.size() == 4 &&
ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
4) {
return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe any of your test cases hit this case. If you get here, you need to emit a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 and the value you're passing in as str won't do that.

These test cases should get here (copied from clang/test/CodeGen/X86/avx512f-builtins.c)

__m512 test_mm512_cvt_roundph_ps(__m256i __A)
{
    // CHECK-LABEL: test_mm512_cvt_roundph_ps
    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
    return _mm512_cvt_roundph_ps(__A, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A)
{
    // CHECK-LABEL: test_mm512_mask_cvt_roundph_ps
    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
    return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_NO_EXC);
}

__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A)
{
    // CHECK-LABEL: test_mm512_maskz_cvt_roundph_ps
    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512(
    return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_NO_EXC);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

Copilot AI review requested due to automatic review settings January 6, 2026 17:11
Copy link
Contributor

Copilot AI left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pull request overview

This PR implements CIR (Clang Intermediate Representation) support for three X86 F16 (half-precision float) to float conversion builtins, contributing to the broader effort tracked in issue #167765 to implement missing X86 builtins in the CIR incubator.

  • Adds the emitX86CvtF16ToFloatExpr helper function to handle F16-to-float conversions with proper rounding mode support
  • Implements the three masked conversion builtins with support for both default rounding and explicit rounding modes via LLVM intrinsics
  • Provides comprehensive test coverage including mask, maskz, and rounding variants

Reviewed changes

Copilot reviewed 2 out of 2 changed files in this pull request and generated 1 comment.

File Description
clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c New comprehensive test file with 9 test functions covering all three builtins with mask, maskz, and rounding variants, comparing CIR, LLVM, and original CodeGen outputs
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp Implements emitX86CvtF16ToFloatExpr function (lines 449-502) to handle F16-to-float conversion logic including subvector extraction, bitcasting, fp-extension, and masking; adds switch case entries (lines 1890-1896) to route the three builtins to this implementation

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

return builder.createMul(loc, lhs, rhs);
}

// Convert F16 halfs to floats.
Copy link

Copilot AI Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The word "halfs" should be "halves" (the correct plural form of "half").

Suggested change
// Convert F16 halfs to floats.
// Convert F16 halves to floats.

Copilot uses AI. Check for mistakes.
Copy link
Contributor

@andykaylor andykaylor Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although Copilot's suggestion is correct in terms of grammar, I wouldn't make the suggested change. The use of halfs is awkard though. Perhaps Convert f16 half values to floats.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

assert(constOp && "Expected constant operand");
if (constOp.getIntValue().getZExtValue() != 4) {
StringRef intrinsicName;
switch (builtinID) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only the x86.avx512.mask.vcvtph2ps.512 form of this intrinsic takes a rounding mode parameter, so you don't need to switch on the builtin ID here. Note that in classic codegen EmitX86CvtF16ToFloatExpr always generates a call to Intrinsic::x86_avx512_mask_vcvtph2ps_512 here. We should do the same.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

Copy link
Contributor

@andykaylor andykaylor left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lgtm

@andykaylor andykaylor merged commit 3bdda61 into llvm:main Jan 7, 2026
10 checks passed
@llvm-ci
Copy link

llvm-ci commented Jan 7, 2026

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building clang at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/26994

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM-Unit :: CAS/./CASTests/12/18' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:/buildbot/worker/arc-folder/build/unittests/CAS/./CASTests-LLVM-Unit-32526-12-18.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=18 GTEST_SHARD_INDEX=12 /buildbot/worker/arc-folder/build/unittests/CAS/./CASTests
--

Script:
--
/buildbot/worker/arc-folder/build/unittests/CAS/./CASTests --gtest_filter=CASProgramTest.MappedFileRegionArenaTest
--
Note: Google Test filter = CASProgramTest.MappedFileRegionArenaTest
[==========] Running 1 test from 1 test suite.
[----------] Global test environment set-up.
[----------] 1 test from CASProgramTest
/buildbot/worker/arc-folder/llvm-project/llvm/unittests/CAS/ProgramTest.cpp:147: Failure
Value of: PI.ReturnCode == 0
  Actual: false
Expected: true


/buildbot/worker/arc-folder/llvm-project/llvm/unittests/CAS/ProgramTest.cpp:147
Value of: PI.ReturnCode == 0
  Actual: false
Expected: true



********************


@llvm-ci
Copy link

llvm-ci commented Jan 7, 2026

LLVM Buildbot has detected a new failure on builder lldb-arm-ubuntu running on linaro-lldb-arm-ubuntu while building clang at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/23836

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: tools/lldb-dap/exception/cpp/TestDAP_exception_cpp.py (1243 of 2637)
PASS: lldb-api :: tools/lldb-dap/instruction-breakpoint/TestDAP_instruction_breakpoint.py (1244 of 2637)
PASS: lldb-api :: tools/lldb-dap/invalidated-event/TestDAP_invalidatedEvent.py (1245 of 2637)
PASS: lldb-api :: tools/lldb-dap/io/TestDAP_io.py (1246 of 2637)
PASS: lldb-api :: tools/lldb-dap/locations/TestDAP_locations.py (1247 of 2637)
PASS: lldb-api :: tools/lldb-dap/cancel/TestDAP_cancel.py (1248 of 2637)
PASS: lldb-api :: tools/lldb-dap/evaluate/TestDAP_evaluate.py (1249 of 2637)
PASS: lldb-api :: tools/lldb-dap/memory/TestDAP_memory.py (1250 of 2637)
PASS: lldb-api :: tools/lldb-dap/moduleSymbols/TestDAP_moduleSymbols.py (1251 of 2637)
PASS: lldb-api :: tools/lldb-dap/optimized/TestDAP_optimized.py (1252 of 2637)
FAIL: lldb-api :: tools/lldb-dap/output/TestDAP_output.py (1253 of 2637)
******************** TEST 'lldb-api :: tools/lldb-dap/output/TestDAP_output.py' FAILED ********************
Script:
--
/usr/bin/python3.10 /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --env LLVM_INCLUDE_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/include --env LLVM_TOOLS_DIR=/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --arch armv8l --build-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex --lldb-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/lldb --compiler /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/clang --dsymutil /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./bin --lldb-obj-root /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/tools/lldb --lldb-libs-dir /home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/./lib --cmake-build-type Release /home/tcwg-buildbot/worker/lldb-arm-ubuntu/llvm-project/lldb/test/API/tools/lldb-dap/output -p TestDAP_output.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 3bdda61f8cdbab6adca9a6b579c14f579c299974)
  clang revision 3bdda61f8cdbab6adca9a6b579c14f579c299974
  llvm revision 3bdda61f8cdbab6adca9a6b579c14f579c299974
Skipping the following test categories: libc++, msvcstl, dsym, pdb, gmodules, debugserver, objc

--
Command Output (stderr):
--
========= DEBUG ADAPTER PROTOCOL LOGS =========
[01:10:14.505] (stdio) --> {"command":"initialize","type":"request","arguments":{"adapterID":"lldb-native","clientID":"vscode","columnsStartAt1":true,"linesStartAt1":true,"locale":"en-us","pathFormat":"path","supportsRunInTerminalRequest":true,"supportsVariablePaging":true,"supportsVariableType":true,"supportsStartDebuggingRequest":true,"supportsProgressReporting":true,"supportsInvalidatedEvent":true,"supportsMemoryEvent":true,"$__lldb_sourceInitFile":false},"seq":1}
[01:10:14.505] DAP.cpp:1002 (stdio) queued (command=initialize seq=1)
[01:10:14.509] (stdio) <-- {"body":{"$__lldb_version":"lldb version 22.0.0git (https://github.com/llvm/llvm-project.git revision 3bdda61f8cdbab6adca9a6b579c14f579c299974)\n  clang revision 3bdda61f8cdbab6adca9a6b579c14f579c299974\n  llvm revision 3bdda61f8cdbab6adca9a6b579c14f579c299974","completionTriggerCharacters":["."," ","\t"],"exceptionBreakpointFilters":[{"description":"C++ Catch","filter":"cpp_catch","label":"C++ Catch","supportsCondition":true},{"description":"C++ Throw","filter":"cpp_throw","label":"C++ Throw","supportsCondition":true},{"description":"Objective-C Catch","filter":"objc_catch","label":"Objective-C Catch","supportsCondition":true},{"description":"Objective-C Throw","filter":"objc_throw","label":"Objective-C Throw","supportsCondition":true}],"supportTerminateDebuggee":true,"supportsBreakpointLocationsRequest":true,"supportsCancelRequest":true,"supportsCompletionsRequest":true,"supportsConditionalBreakpoints":true,"supportsConfigurationDoneRequest":true,"supportsDataBreakpointBytes":true,"supportsDataBreakpoints":true,"supportsDelayedStackTraceLoading":true,"supportsDisassembleRequest":true,"supportsEvaluateForHovers":true,"supportsExceptionFilterOptions":true,"supportsExceptionInfoRequest":true,"supportsFunctionBreakpoints":true,"supportsHitConditionalBreakpoints":true,"supportsInstructionBreakpoints":true,"supportsLogPoints":true,"supportsModuleSymbolsRequest":true,"supportsModulesRequest":true,"supportsReadMemoryRequest":true,"supportsSetVariable":true,"supportsSteppingGranularity":true,"supportsValueFormattingOptions":true,"supportsWriteMemoryRequest":true},"command":"initialize","request_seq":1,"seq":1,"success":true,"type":"response"}
[01:10:14.510] (stdio) --> {"command":"launch","type":"request","arguments":{"program":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","initCommands":["settings clear --all","settings set symbols.enable-external-lookup false","settings set target.inherit-tcc true","settings set target.disable-aslr false","settings set target.detach-on-error false","settings set target.auto-apply-fixits false","settings set plugin.process.gdb-remote.packet-timeout 60","settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"","settings set use-color false","settings set show-statusline false"],"exitCommands":["?script print('out\\0\\0', end='\\r\\n', file=sys.stdout)","?script print('err\\0\\0', end='\\r\\n', file=sys.stderr)"],"disableASLR":false,"enableAutoVariableSummaries":false,"enableSyntheticChildDebugging":false,"displayExtendedBacktrace":false},"seq":2}
[01:10:14.510] DAP.cpp:1002 (stdio) queued (command=launch seq=2)
[01:10:14.513] (stdio) <-- {"body":{"category":"console","output":"Running initCommands:\n"},"event":"output","seq":2,"type":"event"}
[01:10:14.513] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings clear --all\n"},"event":"output","seq":3,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set symbols.enable-external-lookup false\n"},"event":"output","seq":4,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.inherit-tcc true\n"},"event":"output","seq":5,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.disable-aslr false\n"},"event":"output","seq":6,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.detach-on-error false\n"},"event":"output","seq":7,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set target.auto-apply-fixits false\n"},"event":"output","seq":8,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set plugin.process.gdb-remote.packet-timeout 60\n"},"event":"output","seq":9,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set symbols.clang-modules-cache-path \"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/module-cache-lldb/lldb-api\"\n"},"event":"output","seq":10,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set use-color false\n"},"event":"output","seq":11,"type":"event"}
[01:10:14.514] (stdio) <-- {"body":{"category":"console","output":"(lldb) settings set show-statusline false\n"},"event":"output","seq":12,"type":"event"}
[01:10:15.255] (stdio) <-- {"event":"initialized","seq":13,"type":"event"}
[01:10:15.258] (stdio) <-- {"body":{"module":{"addressRange":"0xf26d8000","debugInfoSize":"983.3KB","id":"C8C14F49-271D-D5EC-D97C-F1A70B997423-FCAC2BC1","name":"ld-linux-armhf.so.3","path":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolFilePath":"/usr/lib/arm-linux-gnueabihf/ld-linux-armhf.so.3","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":14,"type":"event"}
[01:10:15.260] (stdio) <-- {"body":{"module":{"addressRange":"0x1590000","debugInfoSize":"1.2KB","id":"A42661FB","name":"a.out","path":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","symbolFilePath":"/home/tcwg-buildbot/worker/lldb-arm-ubuntu/build/lldb-test-build.noindex/tools/lldb-dap/output/TestDAP_output.test_output/a.out","symbolStatus":"Symbols loaded."},"reason":"new"},"event":"module","seq":15,"type":"event"}
[01:10:15.260] (stdio) --> {"command":"setBreakpoints","type":"request","arguments":{"source":{"path":"main.c"},"sourceModified":false,"lines":[10],"breakpoints":[{"line":10}]},"seq":3}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants