From 21e58286204a47fb72470e7a1598e1d649ddfcba Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 14:21:53 +0000
Subject: [PATCH 01/20] Implement handling for F16 halfs to floats conversion
 builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  56 +++++-
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 185 ++++++++++++++++++
 2 files changed, 240 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..07f915b51ad6d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,13 +14,20 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
+#include "mlir/IR/Value.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -362,6 +369,45 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+// Convert F16 halfs to floats.
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            const StringRef str,
+                                            llvm::ArrayRef<mlir::Value> ops,
+                                            mlir::Type dstTy) {
+  assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
+         "Unknown cvtph2ps intrinsic");
+
+  // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
+  if (ops.size() == 4 &&
+      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
+          4) {
+    return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+  }
+
+  unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
+  mlir::Value src = ops[0];
+
+  // Extract the subvector
+  if (numElts != cast<cir::VectorType>(src.getType()).getSize()) {
+    assert(numElts == 4 && "Unexpected vector size");
+    src = builder.createVecShuffle(loc, src, {0, 1, 2, 3});
+  }
+
+  // Bitcast from vXi16 to vXf16.
+  cir::VectorType halfTy = cir::VectorType::get(
+      cir::FP16Type::get(builder.getContext()), numElts);
+
+  src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
+
+  // Perform the fp-extension
+  mlir::Value res = builder.createCast(cir::CastKind::floating, src, dstTy);
+
+  if (ops.size() >= 3)
+    res = emitX86Select(builder, loc, ops[2], res, ops[1]);
+  return res;
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,9 +1708,17 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
+                                    convertType(expr->getType()));
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
new file mode 100644
index 0000000000000..ee42f5de48d98
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512fp16 -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
+    // CIR-LABEL: test_vcvtph2ps_mask
+    // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+    // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+    // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+    // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+    // LLVM-LABEL: @test_vcvtph2ps_mask
+    // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // LLVM: ret <4 x float> {{.*}}
+
+    // OGCG-LABEL: @test_vcvtph2ps_mask
+    // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+    // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+    // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+    // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+    // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+    // OGCG: ret <4 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
+}
+
+__m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
+  // CIR-LABEL: test_vcvtph2ps256_mask
+  // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps256_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps256_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
+}
+
+__m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
+  // CIR-LABEL: test_vcvtph2ps512_mask
+  // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
+  // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+
+  // LLVM-LABEL: @test_vcvtph2ps512_mask
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps512_mask
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
+  // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
+}
+
+__m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+  // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+  // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // LLVM: ret <4 x float> {{.*}}
+
+  // OGCG-LABEL: @test_vcvtph2ps_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
+  // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
+  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // OGCG: ret <4 x float> {{.*}}
+
+  return _mm_maskz_cvtph_ps(k, a);
+}
+
+__m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
+
+
+  // LLVM-LABEL: @test_vcvtph2ps256_maskz
+  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}} 
+
+  // OGCG-LABEL: @test_vcvtph2ps256_maskz
+  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
+  // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // OGCG: ret <8 x float> {{.*}}
+   return _mm256_maskz_cvtph_ps(k, a);
+}
+
+__m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
+  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+
+  // LLVM-LABEL: @test_vcvtph2ps512_maskz
+  // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // LLVM: ret <16 x float> {{.*}}
+  
+  // OGCG-LABEL: @test_vcvtph2ps512_maskz
+  // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
+  // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
+  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // OGCG: ret <16 x float> {{.*}}
+  return _mm512_maskz_cvtph_ps(k, a);
+}

From b73200cc338b40a38999ccbdeb174e45c9e9fff2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 14:30:42 +0000
Subject: [PATCH 02/20] Remove unwanted headers included by clangd

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 07f915b51ad6d..9ecec9d615bc4 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,20 +14,13 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
-#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Location.h"
-#include "mlir/IR/Types.h"
-#include "mlir/IR/Value.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
-#include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <cassert>
 
 using namespace clang;
 using namespace clang::CIRGen;

From a72461410892a3561c0cede6cd564a266f507eb4 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 14:33:48 +0000
Subject: [PATCH 03/20] Fix formatting

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 9ecec9d615bc4..b39a4e683385d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -388,8 +388,8 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
   }
 
   // Bitcast from vXi16 to vXf16.
-  cir::VectorType halfTy = cir::VectorType::get(
-      cir::FP16Type::get(builder.getContext()), numElts);
+  cir::VectorType halfTy =
+      cir::VectorType::get(cir::FP16Type::get(builder.getContext()), numElts);
 
   src = builder.createCast(cir::CastKind::bitcast, src, halfTy);
 

From b29a415f3dbfb072c3b29e0c219f9dd1f3e3dbec Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 14:34:03 +0000
Subject: [PATCH 04/20] Fix formatting

---
 .../X86/avx512f16c-builtins.cir               | 393 ++++++++++++++++++
 1 file changed, 393 insertions(+)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
new file mode 100644
index 0000000000000..9364d531b3585
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
@@ -0,0 +1,393 @@
+!s16i = !cir.int<s, 16>
+!s32i = !cir.int<s, 32>
+!s64i = !cir.int<s, 64>
+!u16i = !cir.int<u, 16>
+!u8i = !cir.int<u, 8>
+#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28)
+#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36)
+#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39)
+#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46)
+#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51)
+#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60)
+#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31)
+#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39)
+#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42)
+#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49)
+#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54)
+#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63)
+#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31)
+#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39)
+#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42)
+#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49)
+#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54)
+#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64)
+#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21)
+#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30)
+#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35)
+#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43)
+#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29)
+#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37)
+#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40)
+#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49)
+#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24)
+#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33)
+#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38)
+#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46)
+#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32)
+#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40)
+#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43)
+#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52)
+#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24)
+#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34)
+#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39)
+#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47)
+#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32)
+#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40)
+#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43)
+#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53)
+#loc145 = loc(fused[#loc3, #loc4])
+#loc146 = loc(fused[#loc5, #loc6])
+#loc147 = loc(fused[#loc7, #loc8])
+#loc150 = loc(fused[#loc18, #loc19])
+#loc151 = loc(fused[#loc20, #loc21])
+#loc152 = loc(fused[#loc22, #loc23])
+#loc155 = loc(fused[#loc33, #loc34])
+#loc156 = loc(fused[#loc35, #loc36])
+#loc157 = loc(fused[#loc37, #loc38])
+#loc164 = loc(fused[#loc56, #loc57])
+#loc165 = loc(fused[#loc58, #loc59])
+#loc168 = loc(fused[#loc69, #loc70])
+#loc169 = loc(fused[#loc71, #loc72])
+#loc176 = loc(fused[#loc88, #loc89])
+#loc177 = loc(fused[#loc90, #loc91])
+#loc180 = loc(fused[#loc101, #loc102])
+#loc181 = loc(fused[#loc103, #loc104])
+#loc188 = loc(fused[#loc120, #loc121])
+#loc189 = loc(fused[#loc122, #loc123])
+#loc192 = loc(fused[#loc134, #loc135])
+#loc193 = loc(fused[#loc136, #loc137])
+module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang<c>, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
+  cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> {
+    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc145)
+    %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["src", init] {alignment = 16 : i64} loc(#loc146)
+    %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc147)
+    %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc2)
+    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
+    cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc9)
+    cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc9)
+    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc10)
+    %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10)
+    %6 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc11)
+    %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc12)
+    %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13)
+    %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13)
+    %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13)
+    %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13)
+    %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12)
+    %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13)
+    %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13)
+    cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc148)
+    %15 = cir.load %3 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc148)
+    cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148)
+  } loc(#loc144)
+  cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> {
+    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc150)
+    %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["src", init] {alignment = 32 : i64} loc(#loc151)
+    %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc152)
+    %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc17)
+    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
+    cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc24)
+    cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc24)
+    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc25)
+    %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25)
+    %6 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc26)
+    %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc27)
+    %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25)
+    %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25)
+    %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27)
+    %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28)
+    cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc153)
+    %12 = cir.load %3 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc153)
+    cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153)
+  } loc(#loc149)
+  cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> {
+    %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc155)
+    %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["src", init] {alignment = 64 : i64} loc(#loc156)
+    %2 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc157)
+    %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc32)
+    cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc39)
+    cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc39)
+    cir.store %arg2, %2 : !u16i, !cir.ptr<!u16i> loc(#loc39)
+    %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc40)
+    %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40)
+    %6 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc41)
+    %7 = cir.load align(2) %2 : !cir.ptr<!u16i>, !u16i loc(#loc42)
+    %8 = cir.const #cir.int<4> : !s32i loc(#loc43)
+    %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40)
+    %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40)
+    %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42)
+    %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44)
+    cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc158)
+    %13 = cir.load %3 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc158)
+    cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158)
+  } loc(#loc154)
+  cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> {
+    %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc48)
+    %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160)
+    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161)
+    cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc52)
+    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc49)
+    cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc162)
+    %4 = cir.load %0 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc162)
+    cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162)
+  } loc(#loc159)
+  cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> {
+    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc164)
+    %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc165)
+    %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc55)
+    cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc60)
+    cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc60)
+    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc61)
+    %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61)
+    %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62)
+    %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc63)
+    %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64)
+    %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64)
+    %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64)
+    %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64)
+    %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63)
+    %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64)
+    %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64)
+    cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc166)
+    %14 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc166)
+    cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166)
+  } loc(#loc163)
+  cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> {
+    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc168)
+    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc169)
+    %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc68)
+    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc73)
+    cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc73)
+    %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc74)
+    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc75)
+    %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76)
+    cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc170)
+    %6 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc170)
+    cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170)
+  } loc(#loc167)
+  cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> {
+    %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc80)
+    %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172)
+    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173)
+    cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc84)
+    %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc81)
+    cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc174)
+    %4 = cir.load %0 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc174)
+    cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174)
+  } loc(#loc171)
+  cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> {
+    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc176)
+    %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc177)
+    %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc87)
+    cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc92)
+    cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc92)
+    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc93)
+    %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93)
+    %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94)
+    %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc95)
+    %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93)
+    %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93)
+    %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95)
+    %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96)
+    cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc178)
+    %11 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc178)
+    cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178)
+  } loc(#loc175)
+  cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> {
+    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc180)
+    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc181)
+    %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc100)
+    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc105)
+    cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc105)
+    %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc106)
+    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc107)
+    %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108)
+    cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc182)
+    %6 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc182)
+    cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182)
+  } loc(#loc179)
+  cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> {
+    %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc112)
+    %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184)
+    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185)
+    cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc116)
+    %3 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc113)
+    cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc186)
+    %4 = cir.load %0 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc186)
+    cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186)
+  } loc(#loc183)
+  cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> {
+    %0 = cir.alloca !u16i, !cir.ptr<!u16i>, ["__U", init] {alignment = 2 : i64} loc(#loc188)
+    %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["__A", init] {alignment = 32 : i64} loc(#loc189)
+    %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc119)
+    cir.store %arg0, %0 : !u16i, !cir.ptr<!u16i> loc(#loc124)
+    cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc124)
+    %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc125)
+    %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125)
+    %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126)
+    %6 = cir.load align(2) %0 : !cir.ptr<!u16i>, !u16i loc(#loc127)
+    %7 = cir.const #cir.int<4> : !s32i loc(#loc128)
+    %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125)
+    %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125)
+    %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127)
+    %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129)
+    cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc190)
+    %12 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc190)
+    cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190)
+  } loc(#loc187)
+  cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> {
+    %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc192)
+    %1 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc193)
+    %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc133)
+    cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc138)
+    cir.store %arg1, %1 : !u16i, !cir.ptr<!u16i> loc(#loc138)
+    %3 = cir.load align(2) %1 : !cir.ptr<!u16i>, !u16i loc(#loc139)
+    %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc140)
+    %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141)
+    cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc194)
+    %6 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc194)
+    cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194)
+  } loc(#loc191)
+} loc(#loc)
+#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0)
+#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1)
+#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1)
+#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63)
+#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48)
+#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51)
+#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56)
+#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10)
+#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3)
+#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57)
+#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1)
+#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1)
+#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66)
+#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51)
+#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54)
+#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59)
+#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10)
+#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3)
+#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60)
+#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1)
+#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1)
+#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67)
+#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52)
+#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55)
+#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60)
+#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63)
+#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10)
+#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3)
+#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64)
+#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1)
+#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1)
+#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24)
+#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57)
+#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32)
+#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1)
+#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3)
+#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1)
+#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1)
+#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1)
+#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59)
+#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14)
+#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25)
+#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19)
+#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3)
+#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28)
+#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1)
+#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1)
+#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52)
+#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29)
+#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32)
+#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10)
+#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3)
+#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33)
+#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1)
+#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1)
+#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24)
+#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81)
+#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32)
+#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53)
+#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3)
+#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1)
+#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1)
+#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1)
+#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62)
+#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17)
+#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28)
+#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19)
+#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3)
+#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31)
+#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1)
+#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1)
+#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55)
+#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33)
+#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36)
+#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11)
+#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4)
+#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37)
+#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1)
+#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1)
+#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23)
+#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78)
+#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31)
+#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56)
+#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3)
+#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1)
+#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1)
+#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1)
+#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63)
+#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28)
+#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30)
+#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18)
+#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19)
+#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3)
+#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42)
+#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1)
+#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1)
+#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56)
+#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32)
+#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35)
+#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10)
+#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3)
+#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36)
+#loc144 = loc(fused[#loc1, #loc2])
+#loc148 = loc(fused[#loc14, #loc15])
+#loc149 = loc(fused[#loc16, #loc17])
+#loc153 = loc(fused[#loc29, #loc30])
+#loc154 = loc(fused[#loc31, #loc32])
+#loc158 = loc(fused[#loc45, #loc46])
+#loc159 = loc(fused[#loc47, #loc48])
+#loc160 = loc(fused[#loc49, #loc50])
+#loc161 = loc(fused[#loc51, #loc50])
+#loc162 = loc(fused[#loc53, #loc50])
+#loc163 = loc(fused[#loc54, #loc55])
+#loc166 = loc(fused[#loc65, #loc66])
+#loc167 = loc(fused[#loc67, #loc68])
+#loc170 = loc(fused[#loc77, #loc78])
+#loc171 = loc(fused[#loc79, #loc80])
+#loc172 = loc(fused[#loc81, #loc82])
+#loc173 = loc(fused[#loc83, #loc82])
+#loc174 = loc(fused[#loc85, #loc82])
+#loc175 = loc(fused[#loc86, #loc87])
+#loc178 = loc(fused[#loc97, #loc98])
+#loc179 = loc(fused[#loc99, #loc100])
+#loc182 = loc(fused[#loc109, #loc110])
+#loc183 = loc(fused[#loc111, #loc112])
+#loc184 = loc(fused[#loc113, #loc114])
+#loc185 = loc(fused[#loc115, #loc114])
+#loc186 = loc(fused[#loc117, #loc114])
+#loc187 = loc(fused[#loc118, #loc119])
+#loc190 = loc(fused[#loc130, #loc131])
+#loc191 = loc(fused[#loc132, #loc133])
+#loc194 = loc(fused[#loc142, #loc143])

From 284c8750554ef09d2f919e30ef2c82d8289ef493 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 20:29:57 +0530
Subject: [PATCH 05/20] Delete
 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir

---
 .../X86/avx512f16c-builtins.cir               | 393 ------------------
 1 file changed, 393 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
deleted file mode 100644
index 9364d531b3585..0000000000000
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.cir
+++ /dev/null
@@ -1,393 +0,0 @@
-!s16i = !cir.int<s, 16>
-!s32i = !cir.int<s, 32>
-!s64i = !cir.int<s, 64>
-!u16i = !cir.int<u, 16>
-!u8i = !cir.int<u, 8>
-#loc3 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:28)
-#loc4 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:36)
-#loc5 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:39)
-#loc6 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:46)
-#loc7 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:51)
-#loc8 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:60)
-#loc18 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:31)
-#loc19 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:39)
-#loc20 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:42)
-#loc21 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:49)
-#loc22 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:54)
-#loc23 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:63)
-#loc33 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:31)
-#loc34 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:39)
-#loc35 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:42)
-#loc36 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:49)
-#loc37 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:54)
-#loc38 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:64)
-#loc56 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:21)
-#loc57 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:30)
-#loc58 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:35)
-#loc59 = loc("./lib/clang/22/include/avx512vlintrin.h":8027:43)
-#loc69 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:29)
-#loc70 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:37)
-#loc71 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:40)
-#loc72 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:49)
-#loc88 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:24)
-#loc89 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:33)
-#loc90 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:38)
-#loc91 = loc("./lib/clang/22/include/avx512vlintrin.h":8044:46)
-#loc101 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:32)
-#loc102 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:40)
-#loc103 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:43)
-#loc104 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:52)
-#loc120 = loc("./lib/clang/22/include/avx512fintrin.h":3584:24)
-#loc121 = loc("./lib/clang/22/include/avx512fintrin.h":3584:34)
-#loc122 = loc("./lib/clang/22/include/avx512fintrin.h":3584:39)
-#loc123 = loc("./lib/clang/22/include/avx512fintrin.h":3584:47)
-#loc134 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:32)
-#loc135 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:40)
-#loc136 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:43)
-#loc137 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:53)
-#loc145 = loc(fused[#loc3, #loc4])
-#loc146 = loc(fused[#loc5, #loc6])
-#loc147 = loc(fused[#loc7, #loc8])
-#loc150 = loc(fused[#loc18, #loc19])
-#loc151 = loc(fused[#loc20, #loc21])
-#loc152 = loc(fused[#loc22, #loc23])
-#loc155 = loc(fused[#loc33, #loc34])
-#loc156 = loc(fused[#loc35, #loc36])
-#loc157 = loc(fused[#loc37, #loc38])
-#loc164 = loc(fused[#loc56, #loc57])
-#loc165 = loc(fused[#loc58, #loc59])
-#loc168 = loc(fused[#loc69, #loc70])
-#loc169 = loc(fused[#loc71, #loc72])
-#loc176 = loc(fused[#loc88, #loc89])
-#loc177 = loc(fused[#loc90, #loc91])
-#loc180 = loc(fused[#loc101, #loc102])
-#loc181 = loc(fused[#loc103, #loc104])
-#loc188 = loc(fused[#loc120, #loc121])
-#loc189 = loc(fused[#loc122, #loc123])
-#loc192 = loc(fused[#loc134, #loc135])
-#loc193 = loc(fused[#loc136, #loc137])
-module @"/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c" attributes {cir.lang = #cir.lang<c>, cir.module_asm = [], cir.triple = "x86_64-unknown-linux", dlti.dl_spec = #dlti.dl_spec<!llvm.ptr<270> = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array<i32: 8, 16, 32, 64>, "dlti.stack_alignment" = 128 : i64>} {
-  cir.func no_inline dso_local @test_vcvtph2ps_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !cir.vector<4 x !cir.float> loc(fused[#loc5, #loc6]), %arg2: !u8i loc(fused[#loc7, #loc8])) -> !cir.vector<4 x !cir.float> {
-    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc145)
-    %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["src", init] {alignment = 16 : i64} loc(#loc146)
-    %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc147)
-    %3 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc2)
-    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc9)
-    cir.store %arg1, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc9)
-    cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc9)
-    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc10)
-    %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10)
-    %6 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc11)
-    %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc12)
-    %8 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc13)
-    %9 = cir.vec.shuffle(%5, %8 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc13)
-    %10 = cir.cast bitcast %9 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc13)
-    %11 = cir.cast floating %10 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc13)
-    %12 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc12)
-    %13 = cir.vec.shuffle(%12, %12 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc13)
-    %14 = cir.select if %13 then %11 else %6 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc13)
-    cir.store %14, %3 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc148)
-    %15 = cir.load %3 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc148)
-    cir.return %15 : !cir.vector<4 x !cir.float> loc(#loc148)
-  } loc(#loc144)
-  cir.func no_inline dso_local @test_vcvtph2ps256_mask(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !cir.vector<8 x !cir.float> loc(fused[#loc20, #loc21]), %arg2: !u8i loc(fused[#loc22, #loc23])) -> !cir.vector<8 x !cir.float> {
-    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc150)
-    %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["src", init] {alignment = 32 : i64} loc(#loc151)
-    %2 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc152)
-    %3 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc17)
-    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc24)
-    cir.store %arg1, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc24)
-    cir.store %arg2, %2 : !u8i, !cir.ptr<!u8i> loc(#loc24)
-    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc25)
-    %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc25)
-    %6 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc26)
-    %7 = cir.load align(1) %2 : !cir.ptr<!u8i>, !u8i loc(#loc27)
-    %8 = cir.cast bitcast %5 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc25)
-    %9 = cir.cast floating %8 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc25)
-    %10 = cir.cast bitcast %7 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc27)
-    %11 = cir.select if %10 then %9 else %6 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc28)
-    cir.store %11, %3 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc153)
-    %12 = cir.load %3 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc153)
-    cir.return %12 : !cir.vector<8 x !cir.float> loc(#loc153)
-  } loc(#loc149)
-  cir.func no_inline dso_local @test_vcvtph2ps512_mask(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc33, #loc34]), %arg1: !cir.vector<16 x !cir.float> loc(fused[#loc35, #loc36]), %arg2: !u16i loc(fused[#loc37, #loc38])) -> !cir.vector<16 x !cir.float> {
-    %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc155)
-    %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["src", init] {alignment = 64 : i64} loc(#loc156)
-    %2 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc157)
-    %3 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc32)
-    cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc39)
-    cir.store %arg1, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc39)
-    cir.store %arg2, %2 : !u16i, !cir.ptr<!u16i> loc(#loc39)
-    %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc40)
-    %5 = cir.cast bitcast %4 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc40)
-    %6 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc41)
-    %7 = cir.load align(2) %2 : !cir.ptr<!u16i>, !u16i loc(#loc42)
-    %8 = cir.const #cir.int<4> : !s32i loc(#loc43)
-    %9 = cir.cast bitcast %5 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc40)
-    %10 = cir.cast floating %9 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc40)
-    %11 = cir.cast bitcast %7 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc42)
-    %12 = cir.select if %11 then %10 else %6 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc44)
-    cir.store %12, %3 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc158)
-    %13 = cir.load %3 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc158)
-    cir.return %13 : !cir.vector<16 x !cir.float> loc(#loc158)
-  } loc(#loc154)
-  cir.func always_inline internal private dso_local @_mm_setzero_ps() -> !cir.vector<4 x !cir.float> {
-    %0 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc48)
-    %1 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, [".compoundliteral"] {alignment = 16 : i64} loc(#loc160)
-    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<4 x !cir.float> loc(#loc161)
-    cir.store align(16) %2, %1 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc52)
-    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc49)
-    cir.store %3, %0 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc162)
-    %4 = cir.load %0 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc162)
-    cir.return %4 : !cir.vector<4 x !cir.float> loc(#loc162)
-  } loc(#loc159)
-  cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc56, #loc57]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc58, #loc59])) -> !cir.vector<4 x !cir.float> {
-    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc164)
-    %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc165)
-    %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc55)
-    cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc60)
-    cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc60)
-    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc61)
-    %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc61)
-    %5 = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float> loc(#loc62)
-    %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc63)
-    %7 = cir.const #cir.poison : !cir.vector<8 x !s16i> loc(#loc64)
-    %8 = cir.vec.shuffle(%4, %7 : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i> loc(#loc64)
-    %9 = cir.cast bitcast %8 : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16> loc(#loc64)
-    %10 = cir.cast floating %9 : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float> loc(#loc64)
-    %11 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc63)
-    %12 = cir.vec.shuffle(%11, %11 : !cir.vector<8 x !cir.bool>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.bool> loc(#loc64)
-    %13 = cir.select if %12 then %10 else %5 : (!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float> loc(#loc64)
-    cir.store %13, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc166)
-    %14 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc166)
-    cir.return %14 : !cir.vector<4 x !cir.float> loc(#loc166)
-  } loc(#loc163)
-  cir.func no_inline dso_local @test_vcvtph2ps_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc69, #loc70]), %arg1: !u8i loc(fused[#loc71, #loc72])) -> !cir.vector<4 x !cir.float> {
-    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc168)
-    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc169)
-    %2 = cir.alloca !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>>, ["__retval"] {alignment = 16 : i64} loc(#loc68)
-    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc73)
-    cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc73)
-    %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc74)
-    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc75)
-    %5 = cir.call @_mm_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<4 x !cir.float> loc(#loc76)
-    cir.store %5, %2 : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> loc(#loc170)
-    %6 = cir.load %2 : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float> loc(#loc170)
-    cir.return %6 : !cir.vector<4 x !cir.float> loc(#loc170)
-  } loc(#loc167)
-  cir.func always_inline internal private dso_local @_mm256_setzero_ps() -> !cir.vector<8 x !cir.float> {
-    %0 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc80)
-    %1 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, [".compoundliteral"] {alignment = 32 : i64} loc(#loc172)
-    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<8 x !cir.float> loc(#loc173)
-    cir.store align(32) %2, %1 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc84)
-    %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc81)
-    cir.store %3, %0 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc174)
-    %4 = cir.load %0 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc174)
-    cir.return %4 : !cir.vector<8 x !cir.float> loc(#loc174)
-  } loc(#loc171)
-  cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps(%arg0: !u8i loc(fused[#loc88, #loc89]), %arg1: !cir.vector<2 x !s64i> loc(fused[#loc90, #loc91])) -> !cir.vector<8 x !cir.float> {
-    %0 = cir.alloca !u8i, !cir.ptr<!u8i>, ["__U", init] {alignment = 1 : i64} loc(#loc176)
-    %1 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["__A", init] {alignment = 16 : i64} loc(#loc177)
-    %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc87)
-    cir.store %arg0, %0 : !u8i, !cir.ptr<!u8i> loc(#loc92)
-    cir.store %arg1, %1 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc92)
-    %3 = cir.load align(16) %1 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc93)
-    %4 = cir.cast bitcast %3 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc93)
-    %5 = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float> loc(#loc94)
-    %6 = cir.load align(1) %0 : !cir.ptr<!u8i>, !u8i loc(#loc95)
-    %7 = cir.cast bitcast %4 : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16> loc(#loc93)
-    %8 = cir.cast floating %7 : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float> loc(#loc93)
-    %9 = cir.cast bitcast %6 : !u8i -> !cir.vector<8 x !cir.bool> loc(#loc95)
-    %10 = cir.select if %9 then %8 else %5 : (!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float> loc(#loc96)
-    cir.store %10, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc178)
-    %11 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc178)
-    cir.return %11 : !cir.vector<8 x !cir.float> loc(#loc178)
-  } loc(#loc175)
-  cir.func no_inline dso_local @test_vcvtph2ps256_maskz(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc101, #loc102]), %arg1: !u8i loc(fused[#loc103, #loc104])) -> !cir.vector<8 x !cir.float> {
-    %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>>, ["a", init] {alignment = 16 : i64} loc(#loc180)
-    %1 = cir.alloca !u8i, !cir.ptr<!u8i>, ["k", init] {alignment = 1 : i64} loc(#loc181)
-    %2 = cir.alloca !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>>, ["__retval"] {alignment = 32 : i64} loc(#loc100)
-    cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr<!cir.vector<2 x !s64i>> loc(#loc105)
-    cir.store %arg1, %1 : !u8i, !cir.ptr<!u8i> loc(#loc105)
-    %3 = cir.load align(1) %1 : !cir.ptr<!u8i>, !u8i loc(#loc106)
-    %4 = cir.load align(16) %0 : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i> loc(#loc107)
-    %5 = cir.call @_mm256_maskz_cvtph_ps(%3, %4) : (!u8i, !cir.vector<2 x !s64i>) -> !cir.vector<8 x !cir.float> loc(#loc108)
-    cir.store %5, %2 : !cir.vector<8 x !cir.float>, !cir.ptr<!cir.vector<8 x !cir.float>> loc(#loc182)
-    %6 = cir.load %2 : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float> loc(#loc182)
-    cir.return %6 : !cir.vector<8 x !cir.float> loc(#loc182)
-  } loc(#loc179)
-  cir.func always_inline internal private dso_local @_mm512_setzero_ps() -> !cir.vector<16 x !cir.float> {
-    %0 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc112)
-    %1 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, [".compoundliteral"] {alignment = 64 : i64} loc(#loc184)
-    %2 = cir.const #cir.const_vector<[#cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float, #cir.fp<0.000000e+00> : !cir.float]> : !cir.vector<16 x !cir.float> loc(#loc185)
-    cir.store align(64) %2, %1 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc116)
-    %3 = cir.load align(64) %1 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc113)
-    cir.store %3, %0 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc186)
-    %4 = cir.load %0 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc186)
-    cir.return %4 : !cir.vector<16 x !cir.float> loc(#loc186)
-  } loc(#loc183)
-  cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps(%arg0: !u16i loc(fused[#loc120, #loc121]), %arg1: !cir.vector<4 x !s64i> loc(fused[#loc122, #loc123])) -> !cir.vector<16 x !cir.float> {
-    %0 = cir.alloca !u16i, !cir.ptr<!u16i>, ["__U", init] {alignment = 2 : i64} loc(#loc188)
-    %1 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["__A", init] {alignment = 32 : i64} loc(#loc189)
-    %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc119)
-    cir.store %arg0, %0 : !u16i, !cir.ptr<!u16i> loc(#loc124)
-    cir.store %arg1, %1 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc124)
-    %3 = cir.load align(32) %1 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc125)
-    %4 = cir.cast bitcast %3 : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i> loc(#loc125)
-    %5 = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float> loc(#loc126)
-    %6 = cir.load align(2) %0 : !cir.ptr<!u16i>, !u16i loc(#loc127)
-    %7 = cir.const #cir.int<4> : !s32i loc(#loc128)
-    %8 = cir.cast bitcast %4 : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16> loc(#loc125)
-    %9 = cir.cast floating %8 : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float> loc(#loc125)
-    %10 = cir.cast bitcast %6 : !u16i -> !cir.vector<16 x !cir.bool> loc(#loc127)
-    %11 = cir.select if %10 then %9 else %5 : (!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float> loc(#loc129)
-    cir.store %11, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc190)
-    %12 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc190)
-    cir.return %12 : !cir.vector<16 x !cir.float> loc(#loc190)
-  } loc(#loc187)
-  cir.func no_inline dso_local @test_vcvtph2ps512_maskz(%arg0: !cir.vector<4 x !s64i> loc(fused[#loc134, #loc135]), %arg1: !u16i loc(fused[#loc136, #loc137])) -> !cir.vector<16 x !cir.float> {
-    %0 = cir.alloca !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>, ["a", init] {alignment = 32 : i64} loc(#loc192)
-    %1 = cir.alloca !u16i, !cir.ptr<!u16i>, ["k", init] {alignment = 2 : i64} loc(#loc193)
-    %2 = cir.alloca !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>>, ["__retval"] {alignment = 64 : i64} loc(#loc133)
-    cir.store %arg0, %0 : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>> loc(#loc138)
-    cir.store %arg1, %1 : !u16i, !cir.ptr<!u16i> loc(#loc138)
-    %3 = cir.load align(2) %1 : !cir.ptr<!u16i>, !u16i loc(#loc139)
-    %4 = cir.load align(32) %0 : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i> loc(#loc140)
-    %5 = cir.call @_mm512_maskz_cvtph_ps(%3, %4) : (!u16i, !cir.vector<4 x !s64i>) -> !cir.vector<16 x !cir.float> loc(#loc141)
-    cir.store %5, %2 : !cir.vector<16 x !cir.float>, !cir.ptr<!cir.vector<16 x !cir.float>> loc(#loc194)
-    %6 = cir.load %2 : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float> loc(#loc194)
-    cir.return %6 : !cir.vector<16 x !cir.float> loc(#loc194)
-  } loc(#loc191)
-} loc(#loc)
-#loc = loc("/home/priyanshu/llvm-project/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":0:0)
-#loc1 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:1)
-#loc2 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":36:1)
-#loc9 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":10:63)
-#loc10 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:48)
-#loc11 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:51)
-#loc12 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:56)
-#loc13 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:10)
-#loc14 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:3)
-#loc15 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":35:57)
-#loc16 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:1)
-#loc17 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":62:1)
-#loc24 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":38:66)
-#loc25 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:51)
-#loc26 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:54)
-#loc27 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:59)
-#loc28 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:10)
-#loc29 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:3)
-#loc30 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":61:60)
-#loc31 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:1)
-#loc32 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":89:1)
-#loc39 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":64:67)
-#loc40 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:52)
-#loc41 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:55)
-#loc42 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:60)
-#loc43 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:63)
-#loc44 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:10)
-#loc45 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:3)
-#loc46 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":88:64)
-#loc47 = loc("./lib/clang/22/include/xmmintrin.h":2017:1)
-#loc48 = loc("./lib/clang/22/include/xmmintrin.h":2020:1)
-#loc49 = loc("./lib/clang/22/include/xmmintrin.h":2019:24)
-#loc50 = loc("./lib/clang/22/include/xmmintrin.h":2019:57)
-#loc51 = loc("./lib/clang/22/include/xmmintrin.h":2019:32)
-#loc52 = loc("./lib/clang/22/include/xmmintrin.h":2018:1)
-#loc53 = loc("./lib/clang/22/include/xmmintrin.h":2019:3)
-#loc54 = loc("./lib/clang/22/include/avx512vlintrin.h":8026:1)
-#loc55 = loc("./lib/clang/22/include/avx512vlintrin.h":8033:1)
-#loc60 = loc("./lib/clang/22/include/avx512vlintrin.h":8028:1)
-#loc61 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:59)
-#loc62 = loc("./lib/clang/22/include/avx512vlintrin.h":8031:14)
-#loc63 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:25)
-#loc64 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:19)
-#loc65 = loc("./lib/clang/22/include/avx512vlintrin.h":8029:3)
-#loc66 = loc("./lib/clang/22/include/avx512vlintrin.h":8032:28)
-#loc67 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:1)
-#loc68 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":126:1)
-#loc73 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":91:52)
-#loc74 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:29)
-#loc75 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:32)
-#loc76 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:10)
-#loc77 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:3)
-#loc78 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":125:33)
-#loc79 = loc("./lib/clang/22/include/avxintrin.h":4291:1)
-#loc80 = loc("./lib/clang/22/include/avxintrin.h":4293:1)
-#loc81 = loc("./lib/clang/22/include/avxintrin.h":4292:24)
-#loc82 = loc("./lib/clang/22/include/avxintrin.h":4292:81)
-#loc83 = loc("./lib/clang/22/include/avxintrin.h":4292:32)
-#loc84 = loc("./lib/clang/22/include/avxintrin.h":4291:53)
-#loc85 = loc("./lib/clang/22/include/avxintrin.h":4292:3)
-#loc86 = loc("./lib/clang/22/include/avx512vlintrin.h":8043:1)
-#loc87 = loc("./lib/clang/22/include/avx512vlintrin.h":8050:1)
-#loc92 = loc("./lib/clang/22/include/avx512vlintrin.h":8045:1)
-#loc93 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:62)
-#loc94 = loc("./lib/clang/22/include/avx512vlintrin.h":8048:17)
-#loc95 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:28)
-#loc96 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:19)
-#loc97 = loc("./lib/clang/22/include/avx512vlintrin.h":8046:3)
-#loc98 = loc("./lib/clang/22/include/avx512vlintrin.h":8049:31)
-#loc99 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:1)
-#loc100 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":156:1)
-#loc105 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":128:55)
-#loc106 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:33)
-#loc107 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:36)
-#loc108 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:11)
-#loc109 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:4)
-#loc110 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":155:37)
-#loc111 = loc("./lib/clang/22/include/avx512fintrin.h":259:1)
-#loc112 = loc("./lib/clang/22/include/avx512fintrin.h":262:1)
-#loc113 = loc("./lib/clang/22/include/avx512fintrin.h":260:23)
-#loc114 = loc("./lib/clang/22/include/avx512fintrin.h":261:78)
-#loc115 = loc("./lib/clang/22/include/avx512fintrin.h":260:31)
-#loc116 = loc("./lib/clang/22/include/avx512fintrin.h":259:56)
-#loc117 = loc("./lib/clang/22/include/avx512fintrin.h":260:3)
-#loc118 = loc("./lib/clang/22/include/avx512fintrin.h":3583:1)
-#loc119 = loc("./lib/clang/22/include/avx512fintrin.h":3590:1)
-#loc124 = loc("./lib/clang/22/include/avx512fintrin.h":3585:1)
-#loc125 = loc("./lib/clang/22/include/avx512fintrin.h":3586:63)
-#loc126 = loc("./lib/clang/22/include/avx512fintrin.h":3587:28)
-#loc127 = loc("./lib/clang/22/include/avx512fintrin.h":3588:30)
-#loc128 = loc("./lib/clang/22/include/avx512fintrin.h":3589:18)
-#loc129 = loc("./lib/clang/22/include/avx512fintrin.h":3586:19)
-#loc130 = loc("./lib/clang/22/include/avx512fintrin.h":3586:3)
-#loc131 = loc("./lib/clang/22/include/avx512fintrin.h":3589:42)
-#loc132 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:1)
-#loc133 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":185:1)
-#loc138 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":158:56)
-#loc139 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:32)
-#loc140 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:35)
-#loc141 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:10)
-#loc142 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:3)
-#loc143 = loc("../clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c":184:36)
-#loc144 = loc(fused[#loc1, #loc2])
-#loc148 = loc(fused[#loc14, #loc15])
-#loc149 = loc(fused[#loc16, #loc17])
-#loc153 = loc(fused[#loc29, #loc30])
-#loc154 = loc(fused[#loc31, #loc32])
-#loc158 = loc(fused[#loc45, #loc46])
-#loc159 = loc(fused[#loc47, #loc48])
-#loc160 = loc(fused[#loc49, #loc50])
-#loc161 = loc(fused[#loc51, #loc50])
-#loc162 = loc(fused[#loc53, #loc50])
-#loc163 = loc(fused[#loc54, #loc55])
-#loc166 = loc(fused[#loc65, #loc66])
-#loc167 = loc(fused[#loc67, #loc68])
-#loc170 = loc(fused[#loc77, #loc78])
-#loc171 = loc(fused[#loc79, #loc80])
-#loc172 = loc(fused[#loc81, #loc82])
-#loc173 = loc(fused[#loc83, #loc82])
-#loc174 = loc(fused[#loc85, #loc82])
-#loc175 = loc(fused[#loc86, #loc87])
-#loc178 = loc(fused[#loc97, #loc98])
-#loc179 = loc(fused[#loc99, #loc100])
-#loc182 = loc(fused[#loc109, #loc110])
-#loc183 = loc(fused[#loc111, #loc112])
-#loc184 = loc(fused[#loc113, #loc114])
-#loc185 = loc(fused[#loc115, #loc114])
-#loc186 = loc(fused[#loc117, #loc114])
-#loc187 = loc(fused[#loc118, #loc119])
-#loc190 = loc(fused[#loc130, #loc131])
-#loc191 = loc(fused[#loc132, #loc133])
-#loc194 = loc(fused[#loc142, #loc143])

From 416199137834e287ad333adfb5181e5941f71826 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 16:28:44 +0000
Subject: [PATCH 06/20] Update Test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 43 ++++++++++++-------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index ee42f5de48d98..35fe714fea626 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,11 +8,17 @@
 #include <immintrin.h>
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
-    // CIR-LABEL: test_vcvtph2ps_mask
-    // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle({{.*}}, {{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
-    // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-    // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-    // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
+  // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
 
     // LLVM-LABEL: @test_vcvtph2ps_mask
     // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -36,11 +42,15 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
-  // CIR-LABEL: test_vcvtph2ps256_mask
-  // CIR: %[[VAL_5:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VAL_5]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: cir.select if {{.*}} then %[[FLOAT_EXT]] else {{.*}}
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
+  // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -62,12 +72,15 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 }
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
-  // CIR-LABEL: test_vcvtph2ps512_mask
-  // CIR: %[[BITCAST_I:.*]] = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[BITCAST_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK:.*]] = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.bool>
-  // CIR: cir.select if %[[MASK]] then %[[FLOAT_EXT]] else {{.*}}
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool>
+  // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>

From b91283786ee77431883b0c948a2ffa0bbb773c40 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 16:33:42 +0000
Subject: [PATCH 07/20] Update Test

---
 clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 35fe714fea626..a376e792005c5 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,7 +8,7 @@
 #include <immintrin.h>
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask 
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>

From dc57ff39f224e634b02abcea2db7488725ee00cb Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 16:59:17 +0000
Subject: [PATCH 08/20] Update test

---
 .../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index a376e792005c5..680b37abb5436 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,6 +9,7 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask 
+  // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
@@ -43,6 +44,7 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
+  // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
@@ -73,6 +75,7 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
+  // CIR: cir.store {{.*}} : !u16i, !cir.ptr<!u16i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
@@ -102,7 +105,7 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 }
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm_maskz_cvtph_ps
+  // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
   // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
@@ -114,7 +117,7 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
   // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
 
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
+  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
   // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
@@ -139,14 +142,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm256_maskz_cvtph_ps
+  // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
   // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
 
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
   // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
 
 
@@ -169,14 +172,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
-  // CIR-LABEL: cir.func always_inline internal private dso_local @_mm512_maskz_cvtph_ps
+  // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
   // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
   // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
   // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
 
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
   // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz

From 1c6c877243dab12efaebcf56f4089ef55ee82683 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 17:45:51 +0000
Subject: [PATCH 09/20] Update Test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 97 +++++++++----------
 1 file changed, 45 insertions(+), 52 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 680b37abb5436..0c8960f5a8431 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,50 +9,45 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask 
-  // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC_I]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
-  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
-  // CIR: cir.select if %[[FINAL_MASK]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
-
-    // LLVM-LABEL: @test_vcvtph2ps_mask
-    // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-    // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-    // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
-    // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
-    // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-    // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
-    // LLVM: ret <4 x float> {{.*}}
-
-    // OGCG-LABEL: @test_vcvtph2ps_mask
-    // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-    // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-    // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
-    // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
-    // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-    // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
-    // OGCG: ret <4 x float> {{.*}}
+  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+
+  // LLVM-LABEL: @test_vcvtph2ps_mask
+  // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+  // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+  // LLVM: ret <4 x float> {{.*}}
+  
+  // OGCG-LABEL: @test_vcvtph2ps_mask
+  // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
+  // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
+  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+  // OGCG: ret <4 x float> {{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
-  // CIR: cir.store {{.*}} : !u8i, !cir.ptr<!u8i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
-  // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -75,15 +70,14 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
-  // CIR: cir.store {{.*}} : !u16i, !cir.ptr<!u16i>
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[BITCAST:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[FLOAT_EXT:.*]] = cir.cast floating %[[BITCAST]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.bool>
-  // CIR: cir.select if %[[BOOL_VEC]] then %[[FLOAT_EXT]] else %[[LOAD_SRC]]
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -106,16 +100,12 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
-  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[VEC:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[SHUFFLE:.*]] = cir.vec.shuffle(%[[VEC]], {{.*}} : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[SHUFFLE]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %[[CONV:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.bool>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] : !cir.vector<8 x !cir.bool>) {{.*}} : !cir.vector<4 x !cir.bool>
-  // CIR: cir.select if %[[FINAL_MASK]] then %[[CONV]] else %[[ZERO]]
+  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
   // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -143,11 +133,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
-  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
+  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
   // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
@@ -173,11 +166,11 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
-  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
   // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[CONV_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
   // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})

From ab226a73182af66ee130ef3477b583d38ddb5e54 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 19:00:41 +0000
Subject: [PATCH 10/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 84 +++++++++++--------
 1 file changed, 47 insertions(+), 37 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 0c8960f5a8431..e1ce6475b66ff 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -8,14 +8,17 @@
 #include <immintrin.h>
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
-  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask 
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
   // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -41,13 +44,13 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -71,13 +74,13 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -99,13 +102,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 }
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func {{.*}} @_mm_maskz_cvtph_ps
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
   // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[BOOL_VEC]], %[[BOOL_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[CONV]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
   // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
@@ -132,15 +139,15 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
-  // CIR-LABEL: cir.func {{.*}} @_mm256_maskz_cvtph_ps
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
-  // CIR: %[[LOAD_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[VEC_I:.*]] = cir.cast bitcast %[[LOAD_VAL]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[BITCAST_H:.*]] = cir.cast bitcast %[[VEC_I]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[CONV:.*]] = cir.cast floating %[[BITCAST_H]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
   // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
@@ -165,12 +172,15 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
-  // CIR-LABEL: cir.func {{.*}} @_mm512_maskz_cvtph_ps
+  // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
+  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
   // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
-  // CIR: %[[MASK_VAL:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[CONV:.*]] = cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[BOOL_VEC:.*]] = cir.cast bitcast %[[MASK_VAL]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[BOOL_VEC]], %[[CONV]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
   // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})

From 47dfc767457b8c405b04e74cd80ab4e823d3cdfd Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Thu, 25 Dec 2025 19:38:21 +0000
Subject: [PATCH 11/20] Update test

---
 .../CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index e1ce6475b66ff..6ec6e7447f72e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -13,7 +13,8 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+  // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
@@ -107,7 +108,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], {{.*}}) : !cir.vector<4 x !s16i>
+  // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
@@ -135,7 +137,8 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
   // OGCG: ret <4 x float> {{.*}}
 
-  return _mm_maskz_cvtph_ps(k, a);
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
@@ -168,7 +171,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
   // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
   // OGCG: ret <8 x float> {{.*}}
-   return _mm256_maskz_cvtph_ps(k, a);
+   typedef short __v8hi __attribute__((__vector_size__(16)));
+   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
@@ -200,5 +204,6 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
   // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
   // OGCG: ret <16 x float> {{.*}}
-  return _mm512_maskz_cvtph_ps(k, a);
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
 }

From be0c47638032ced5344ea7e36399ce64283e9fca Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Fri, 26 Dec 2025 03:58:51 +0000
Subject: [PATCH 12/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 80 ++++++++-----------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 6ec6e7447f72e..f5140502595d9 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -14,12 +14,12 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
+  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
   // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[LOAD_SRC]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -104,20 +104,17 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
-  // CIR: %[[ZERO:.*]] = cir.call @_mm_setzero_ps()
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] {{.*}}) : !cir.vector<4 x !s16i>
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] {{.*}}) : !cir.vector<4 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[FINAL_MASK]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
-
-  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps_maskz
-  // CIR: cir.call @_mm_maskz_cvtph_ps({{.*}}, {{.*}})
+  // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps()
+  // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -143,18 +140,14 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[ZERO:.*]] = cir.call @_mm256_setzero_ps()
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
-
-  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps256_maskz
-  // CIR: cir.call @_mm256_maskz_cvtph_ps({{.*}}, {{.*}}) 
-
+  // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps()
+  // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -177,17 +170,14 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %[[ZERO:.*]] = cir.call @_mm512_setzero_ps()
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[FLOAT_VEC]], %[[ZERO]]) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
-
-  // CIR-LABEL: cir.func {{.*}} @test_vcvtph2ps512_maskz
-  // CIR: cir.call @_mm512_maskz_cvtph_ps({{.*}}, {{.*}})
+  // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps()
+  // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
   // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>

From 168d3a1d2ed4076da58c363e2130789f3472661d Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Fri, 26 Dec 2025 05:06:39 +0000
Subject: [PATCH 13/20] Fix formatting

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index f5140502595d9..52e0e1c4298fe 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -17,9 +17,9 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_VEC]], %[[MASK_VEC]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[FINAL_MASK]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
   // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -50,8 +50,8 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -80,8 +80,8 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
   // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[MASK_VEC]] {{.*}} %[[FLOAT_VEC]] {{.*}} %[[LOAD_SRC]]
+  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
   // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
@@ -112,9 +112,9 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
   // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[Z_MASK_V:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_V]], %[[Z_MASK_V]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z_FIN_MASK]] {{.*}} %[[Z_FLOAT]] {{.*}} %[[Z_ZERO]]
+  // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -146,8 +146,8 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[Z256_MASK_V:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z256_MASK_V]] {{.*}} %[[Z256_FLOAT]] {{.*}} %[[Z256_ZERO]]
+  // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
   // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
@@ -176,8 +176,8 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
   // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[Z512_MASK_V:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}} {{.*}} %[[Z512_MASK_V]] {{.*}} %[[Z512_FLOAT]] {{.*}} %[[Z512_ZERO]]
+  // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]]
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
   // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>

From f63bfe3376c7c7520b1ea4abfb26805d03f02ae8 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Fri, 26 Dec 2025 06:11:31 +0000
Subject: [PATCH 14/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 257 +++++++++---------
 1 file changed, 129 insertions(+), 128 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 52e0e1c4298fe..1672e51a0f40e 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,190 +9,191 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: %[[NARROW_A:.*]] = cir.vec.shuffle(%[[CAST_A]], %[[POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %[[FINAL_MASK:.*]] = cir.vec.shuffle(%[[MASK_BIT]], %[[MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[FINAL_MASK]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+  // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
-  // LLVM: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
-  // LLVM: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
-  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
+  // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: bitcast <4 x i16> {{.*}} to <4 x half>
+  // LLVM: fpext <4 x half> {{.*}} to <4 x float>
+  // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer
+  // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   // LLVM: ret <4 x float> {{.*}}
   
   // OGCG-LABEL: @test_vcvtph2ps_mask
-  // OGCG: %[[VEC_128:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: %[[NARROWED:.*]] = shufflevector <8 x i16> %[[VEC_128]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %[[HALF_VEC:.*]] = bitcast <4 x i16> %[[NARROWED]] to <4 x half>
-  // OGCG: %[[FLOAT_VEC:.*]] = fpext <4 x half> %[[HALF_VEC]] to <4 x float>
-  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[FLOAT_VEC]], <4 x float> {{.*}}
-  // OGCG: ret <4 x float> {{.*}}
+  // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: fpext <4 x half> {{.*}} to <4 x float>
+  // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer
+  // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+  // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
-  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
-  // LLVM: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
-  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
-  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
-  // LLVM: ret <8 x float> {{.*}}
+  // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: bitcast <8 x i16> {{.*}} to <8 x half>
+  // LLVM: fpext <8 x half> {{.*}} to <8 x float>
+  // LLVM: bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer
+  // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_mask
-  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
-  // OGCG: %[[FPEXT:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
-  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
-  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[FPEXT]], <8 x float> {{.*}}
-  // OGCG: ret <8 x float> {{.*}}
+  // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: bitcast <8 x i16> {{.*}} to <8 x half>
+  // OGCG: fpext <8 x half> {{.*}} to <8 x float>
+  // OGCG: bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer
+  // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
 }
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
-  // CIR: %[[LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %[[CAST_A:.*]] = cir.cast bitcast %[[LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %[[LOAD_SRC:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
-  // CIR: %[[LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[F16_VEC:.*]] = cir.cast bitcast %[[CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[FLOAT_VEC:.*]] = cir.cast floating %[[F16_VEC]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[MASK_BIT:.*]] = cir.cast bitcast %[[LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[MASK_BIT]]{{.*}}%[[FLOAT_VEC]]{{.*}}%[[LOAD_SRC]]
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+  // CIR: cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
-  // LLVM: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
-  // LLVM: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
-  // LLVM: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
-  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
-  // LLVM: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
-  // LLVM: ret <16 x float> {{.*}}
+  // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: bitcast <16 x i16> {{.*}} to <16 x half>
+  // LLVM: fpext <16 x half> {{.*}} to <16 x float>
+  // LLVM: bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer
+  // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_mask
-  // OGCG: %[[BITCAST_I:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
-  // OGCG: %[[BITCAST_H:.*]] = bitcast <16 x i16> %[[BITCAST_I]] to <16 x half>
-  // OGCG: %[[FPEXT:.*]] = fpext <16 x half> %[[BITCAST_H]] to <16 x float>
-  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
-  // OGCG: %[[RESULT:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[FPEXT]], <16 x float> {{.*}}
-  // OGCG: ret <16 x float> {{.*}}
+  // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: bitcast <16 x i16> {{.*}} to <16 x half>
+  // OGCG: fpext <16 x half> {{.*}} to <16 x float>
+  // OGCG: bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer
+  // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
 }
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
-  // CIR: %[[Z_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[Z_CAST_A:.*]] = cir.cast bitcast %[[Z_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[Z_ZERO:.*]] = cir.call @_mm_setzero_ps()
-  // CIR: %[[Z_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[Z_POISON:.*]] = cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: %[[Z_NARROW_A:.*]] = cir.vec.shuffle(%[[Z_CAST_A]], %[[Z_POISON]] : !cir.vector<8 x !s16i>) {{.*}} : !cir.vector<4 x !s16i>
-  // CIR: %[[Z_F16:.*]] = cir.cast bitcast %[[Z_NARROW_A]] : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %[[Z_FLOAT:.*]] = cir.cast floating %[[Z_F16]] : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %[[Z_MASK_BIT:.*]] = cir.cast bitcast %[[Z_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %[[Z_FIN_MASK:.*]] = cir.vec.shuffle(%[[Z_MASK_BIT]], %[[Z_MASK_BIT]] : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z_FIN_MASK]]{{.*}}%[[Z_FLOAT]]{{.*}}%[[Z_ZERO]]
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
-  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
-  // LLVM: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
-  // LLVM: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+  // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+  // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
   // LLVM: ret <4 x float> {{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_maskz
-  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: %[[NARROW:.*]] = shufflevector <8 x i16> %[[BITCAST_I]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %[[BITCAST_H:.*]] = bitcast <4 x i16> %[[NARROW]] to <4 x half>
-  // OGCG: %[[CONV:.*]] = fpext <4 x half> %[[BITCAST_H]] to <4 x float>
-  // OGCG: %[[MASK:.*]] = shufflevector <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %[[RESULT:.*]] = select <4 x i1> %[[MASK]], <4 x float> %[[CONV]], <4 x float> {{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+  // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+  // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
   // OGCG: ret <4 x float> {{.*}}
-
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
-  // CIR: %[[Z256_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %[[Z256_CAST_A:.*]] = cir.cast bitcast %[[Z256_LOAD_A]] : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %[[Z256_ZERO:.*]] = cir.call @_mm256_setzero_ps()
-  // CIR: %[[Z256_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %[[Z256_F16:.*]] = cir.cast bitcast %[[Z256_CAST_A]] : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %[[Z256_FLOAT:.*]] = cir.cast floating %[[Z256_F16]] : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %[[Z256_MASK_BIT:.*]] = cir.cast bitcast %[[Z256_LOAD_K]] : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z256_MASK_BIT]]{{.*}}%[[Z256_FLOAT]]{{.*}}%[[Z256_ZERO]]
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
-  // LLVM: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
-  // LLVM: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
-  // LLVM: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
-  // LLVM: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
-  // LLVM: ret <8 x float> {{.*}} 
+  // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+  // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+  // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
+  // LLVM: ret <8 x float> {{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_maskz
-  // OGCG: %[[BITCAST_I:.*]] = bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: %[[BITCAST_H:.*]] = bitcast <8 x i16> %[[BITCAST_I]] to <8 x half>
-  // OGCG: %[[CONV:.*]] = fpext <8 x half> %[[BITCAST_H]] to <8 x float>
-  // OGCG: %[[MASK:.*]] = bitcast i8 {{.*}} to <8 x i1>
-  // OGCG: %[[RESULT:.*]] = select <8 x i1> %[[MASK]], <8 x float> %[[CONV]], <8 x float> {{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+  // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+  // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
   // OGCG: ret <8 x float> {{.*}}
-   typedef short __v8hi __attribute__((__vector_size__(16)));
-   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
+  typedef short __v8hi __attribute__((__vector_size__(16)));
+  return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
-  // CIR: %[[Z512_LOAD_A:.*]] = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %[[Z512_CAST_A:.*]] = cir.cast bitcast %[[Z512_LOAD_A]] : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %[[Z512_ZERO:.*]] = cir.call @_mm512_setzero_ps()
-  // CIR: %[[Z512_LOAD_K:.*]] = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %[[Z512_F16:.*]] = cir.cast bitcast %[[Z512_CAST_A]] : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %[[Z512_FLOAT:.*]] = cir.cast floating %[[Z512_F16]] : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %[[Z512_MASK_BIT:.*]] = cir.cast bitcast %[[Z512_LOAD_K]] : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%[[Z512_MASK_BIT]]{{.*}}%[[Z512_FLOAT]]{{.*}}%[[Z512_ZERO]]
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
-  // LLVM: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
-  // LLVM: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
-  // LLVM: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
-  // LLVM: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
-  // LLVM: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+  // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+  // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
   // LLVM: ret <16 x float> {{.*}}
-  
+
   // OGCG-LABEL: @test_vcvtph2ps512_maskz
-  // OGCG: %[[BI:.*]] = bitcast <4 x i64> {{.*}} to <16 x i16>
-  // OGCG: %[[BH:.*]] = bitcast <16 x i16> %[[BI]] to <16 x half>
-  // OGCG: %[[CONV:.*]] = fpext <16 x half> %[[BH]] to <16 x float>
-  // OGCG: %[[MASK:.*]] = bitcast i16 {{.*}} to <16 x i1>
-  // OGCG: %[[RES:.*]] = select <16 x i1> %[[MASK]], <16 x float> %[[CONV]], <16 x float> {{.*}}
+  // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+  // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+  // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
   // OGCG: ret <16 x float> {{.*}}
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);

From 964139c73bdc2f966ffb9ad5197e71b4e27f4450 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Fri, 26 Dec 2025 07:28:29 +0000
Subject: [PATCH 15/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 224 ++++++++++--------
 1 file changed, 127 insertions(+), 97 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 1672e51a0f40e..f26e0d5d5ccae 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,95 +9,113 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
-  // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !s16i>
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: cir.cast floating {{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.vec.shuffle({{.*}}) {{.*}} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
-  // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: bitcast <4 x i16> {{.*}} to <4 x half>
-  // LLVM: fpext <4 x half> {{.*}} to <4 x float>
-  // LLVM: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: icmp ne <4 x i1> {{.*}}, zeroinitializer
-  // LLVM: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
-  // LLVM: ret <4 x float> {{.*}}
-  
+  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}}
+  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+  // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+  // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // LLVM: ret <4 x float> %{{.*}}
+
   // OGCG-LABEL: @test_vcvtph2ps_mask
-  // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: shufflevector <8 x i16> {{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: fpext <4 x half> {{.*}} to <4 x float>
-  // OGCG: shufflevector <8 x i1> {{.*}}, <8 x i1> {{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: icmp ne <4 x i1> {{.*}}, zeroinitializer
-  // OGCG: select <4 x i1> {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}}
+  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}}
+  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+  // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
+  // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
-  // CIR: cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: cir.cast floating {{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
-  // LLVM: bitcast <2 x i64> {{.*}} to <8 x i16>
-  // LLVM: bitcast <8 x i16> {{.*}} to <8 x half>
-  // LLVM: fpext <8 x half> {{.*}} to <8 x float>
-  // LLVM: bitcast i8 {{.*}} to <8 x i1>
-  // LLVM: icmp ne <8 x i1> {{.*}}, zeroinitializer
-  // LLVM: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
+  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+  // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_mask
-  // OGCG: bitcast <2 x i64> {{.*}} to <8 x i16>
-  // OGCG: bitcast <8 x i16> {{.*}} to <8 x half>
-  // OGCG: fpext <8 x half> {{.*}} to <8 x float>
-  // OGCG: bitcast i8 {{.*}} to <8 x i1>
-  // OGCG: icmp ne <8 x i1> {{.*}}, zeroinitializer
-  // OGCG: select <8 x i1> {{.*}}, <8 x float> {{.*}}, <8 x float> {{.*}}
+  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
+  // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
+  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
 }
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
-  // CIR: cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: cir.cast floating {{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
-  // LLVM: bitcast <4 x i64> {{.*}} to <16 x i16>
-  // LLVM: bitcast <16 x i16> {{.*}} to <16 x half>
-  // LLVM: fpext <16 x half> {{.*}} to <16 x float>
-  // LLVM: bitcast i16 {{.*}} to <16 x i1>
-  // LLVM: icmp ne <16 x i1> {{.*}}, zeroinitializer
-  // LLVM: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
+  // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+  // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+  // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+  // LLVM: %{{.*}} = load i16, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_mask
-  // OGCG: bitcast <4 x i64> {{.*}} to <16 x i16>
-  // OGCG: bitcast <16 x i16> {{.*}} to <16 x half>
-  // OGCG: fpext <16 x half> {{.*}} to <16 x float>
-  // OGCG: bitcast i16 {{.*}} to <16 x i1>
-  // OGCG: icmp ne <16 x i1> {{.*}}, zeroinitializer
-  // OGCG: select <16 x i1> {{.*}}, <16 x float> {{.*}}, <16 x float> {{.*}}
+  // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
+  // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
+  // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
+  // OGCG: %{{.*}} = load i16, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
 }
@@ -109,32 +127,36 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !s16i>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) {indices = [0, 1, 2, 3]} : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
-  // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
   // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
-  // LLVM: ret <4 x float> {{.*}}
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // LLVM: ret <4 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_maskz
-  // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
   // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> {{.*}}
-  // OGCG: ret <4 x float> {{.*}}
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
 }
@@ -148,23 +170,27 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
-  // LLVM: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
   // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // LLVM: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
-  // LLVM: ret <8 x float> {{.*}}
+  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // LLVM: ret <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_maskz
-  // OGCG: %{{.*}} = bitcast <2 x i64> {{.*}} to <8 x i16>
+  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
   // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // OGCG: %{{.*}} = bitcast i8 {{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> {{.*}}
-  // OGCG: ret <8 x float> {{.*}}
+  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
 }
@@ -178,23 +204,27 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: cir.{{(select if|vec.ternary)}}{{.*}}%{{.*}}, %{{.*}}, %{{.*}}
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
-  // LLVM: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
   // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // LLVM: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
-  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
-  // LLVM: ret <16 x float> {{.*}}
+  // LLVM: %{{.*}} = load i16, ptr %{{.*}}
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  // LLVM: ret <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_maskz
-  // OGCG: %{{.*}} = bitcast <4 x i64> {{.*}} to <16 x i16>
+  // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
   // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // OGCG: %{{.*}} = bitcast i16 {{.*}} to <16 x i1>
-  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}}
-  // OGCG: ret <16 x float> {{.*}}
+  // OGCG: %{{.*}} = load i16, ptr %{{.*}}
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
 }

From 9a999f526c9fdc21ebcb2ee7c1718b49096a48cc Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Sat, 27 Dec 2025 14:35:47 +0000
Subject: [PATCH 16/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 93 ++-----------------
 1 file changed, 9 insertions(+), 84 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index f26e0d5d5ccae..8ce29b57de275 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,36 +9,25 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !cir.float>>, !cir.vector<4 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
-  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // LLVM: %{{.*}} = load <4 x float>, ptr %{{.*}}
-  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
   // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  // LLVM: ret <4 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_mask
-  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // OGCG: %{{.*}} = load <4 x float>, ptr %{{.*}}
-  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
   // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
@@ -52,32 +41,20 @@ __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<8 x !cir.float>>, !cir.vector<8 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
-  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // LLVM: %{{.*}} = load <8 x float>, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_mask
-  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // OGCG: %{{.*}} = load <8 x float>, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
@@ -87,32 +64,20 @@ __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<16 x !cir.float>>, !cir.vector<16 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
-  // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
-  // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
-  // LLVM: %{{.*}} = load <16 x float>, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // LLVM: %{{.*}} = load i16, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_mask
-  // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
-  // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
-  // OGCG: %{{.*}} = load <16 x float>, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // OGCG: %{{.*}} = load i16, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 
@@ -122,35 +87,20 @@ __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %{{.*}} = cir.call @_mm_setzero_ps() : () -> !cir.vector<4 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
-  // CIR: %{{.*}} = cir.const #cir.poison : !cir.vector<8 x !s16i>
+  // CIR: %{{.*}} = cir.call @_mm_setzero_ps()
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
-  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
-  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
-  // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-  // LLVM: ret <4 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_maskz
-  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
-  // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
   // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
@@ -163,31 +113,18 @@ __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<2 x !s64i>>, !cir.vector<2 x !s64i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
-  // CIR: %{{.*}} = cir.call @_mm256_setzero_ps() : () -> !cir.vector<8 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u8i>, !u8i
+  // CIR: %{{.*}} = cir.call @_mm256_setzero_ps()
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<8 x !cir.float>, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
-  // LLVM: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // LLVM: %{{.*}} = load i8, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
-  // LLVM: ret <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_maskz
-  // OGCG: %{{.*}} = load <2 x i64>, ptr %{{.*}}
-  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // OGCG: %{{.*}} = load i8, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
@@ -197,31 +134,19 @@ __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s64i> -> !cir.vector<16 x !s16i>
-  // CIR: %{{.*}} = cir.call @_mm512_setzero_ps() : () -> !cir.vector<16 x !cir.float>
-  // CIR: %{{.*}} = cir.load {{.*}} : !cir.ptr<!u16i>, !u16i
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
-  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.call @_mm512_setzero_ps()
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}} : (!cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>, !cir.vector<16 x !cir.float>, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
-  // LLVM: %{{.*}} = load <4 x i64>, ptr %{{.*}}
-  // LLVM: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
   // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // LLVM: %{{.*}} = load i16, ptr %{{.*}}
   // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-  // LLVM: ret <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_maskz
-  // OGCG: %{{.*}} = load <4 x i64>, ptr %{{.*}}
-  // OGCG: %{{.*}} = bitcast <4 x i64> %{{.*}} to <16 x i16>
   // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // OGCG: %{{.*}} = load i16, ptr %{{.*}}
   // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 

From 55240b56b3bca7336076d5dad83ed8156c0b245a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Sat, 27 Dec 2025 15:02:52 +0000
Subject: [PATCH 17/20] Update test

---
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 93 +++++--------------
 1 file changed, 23 insertions(+), 70 deletions(-)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 8ce29b57de275..381e5d32af770 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -9,147 +9,100 @@
 
 __m128 test_vcvtph2ps_mask(__m128i a, __m128 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_mask
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i>
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps_mask
-  // LLVM: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // LLVM: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_mask
-  // OGCG: %{{.*}} = bitcast <2 x i64> %{{.*}} to <8 x i16>
-  // OGCG: %{{.*}} = shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, src, k);
 }
 
 __m256 test_vcvtph2ps256_mask(__m128i a, __m256 src, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_mask
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_mask
-  // LLVM: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_mask
-  // OGCG: %{{.*}} = bitcast <8 x i16> %{{.*}} to <8 x half>
   // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, src, k);
 }
 
 __m512 test_vcvtph2ps512_mask(__m256i a, __m512 src, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_mask
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s16i> -> !cir.vector<16 x !cir.f16>
   // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_mask
-  // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
-  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_mask
-  // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
-  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, src, k, 4);
 }
 
 __m128 test_vcvtph2ps_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps_maskz
-  // CIR: %{{.*}} = cir.call @_mm_setzero_ps()
   // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !s16i>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<4 x !cir.f16> -> !cir.vector<4 x !cir.float>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps_maskz
-  // LLVM: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // LLVM: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // LLVM: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps_maskz
-  // OGCG: %{{.*}} = bitcast <4 x i16> %{{.*}} to <4 x half>
   // OGCG: %{{.*}} = fpext <4 x half> %{{.*}} to <4 x float>
-  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  // OGCG: %{{.*}} = select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <4 x i1> {{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps_mask((__v8hi)a, _mm_setzero_ps(), k);
 }
 
 __m256 test_vcvtph2ps256_maskz(__m128i a, __mmask8 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps256_maskz
-  // CIR: %{{.*}} = cir.call @_mm256_setzero_ps()
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s16i> -> !cir.vector<8 x !cir.f16>
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<8 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<8 x !cir.f16> -> !cir.vector<8 x !cir.float>
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps256_maskz
   // LLVM: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // LLVM: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps256_maskz
   // OGCG: %{{.*}} = fpext <8 x half> %{{.*}} to <8 x float>
-  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
-  // OGCG: %{{.*}} = select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <8 x i1> {{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   typedef short __v8hi __attribute__((__vector_size__(16)));
   return __builtin_ia32_vcvtph2ps256_mask((__v8hi)a, _mm256_setzero_ps(), k);
 }
 
 __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   // CIR-LABEL: cir.func no_inline dso_local @test_vcvtph2ps512_maskz
-  // CIR: %{{.*}} = cir.call @_mm512_setzero_ps()
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> !cir.vector<16 x !cir.{{(bool|int<s, 1>)}}>
-  // CIR: %{{.*}} = cir.select if %{{.*}} then %{{.*}} else %{{.*}}
+  // CIR: %{{.*}} = cir.cast floating %{{.*}} : !cir.vector<16 x !cir.f16> -> !cir.vector<16 x !cir.float>
+  // CIR: %{{.*}} = cir.vec.ternary(%{{.*}}, %{{.*}}, %{{.*}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
 
   // LLVM-LABEL: @test_vcvtph2ps512_maskz
-  // LLVM: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // LLVM: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
-  // LLVM: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  // LLVM: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
 
   // OGCG-LABEL: @test_vcvtph2ps512_maskz
-  // OGCG: %{{.*}} = bitcast <16 x i16> %{{.*}} to <16 x half>
   // OGCG: %{{.*}} = fpext <16 x half> %{{.*}} to <16 x float>
-  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
-  // OGCG: %{{.*}} = select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
-
+  // OGCG: %{{.*}} = select <16 x i1> {{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
 }

From 1a3a6b85617456718bab83f51d0c5f5f827c24f9 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Tue, 6 Jan 2026 17:10:09 +0000
Subject: [PATCH 18/20] emit full intrinsic name and update test

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 32 +++++++++++----
 .../CodeGenBuiltins/X86/avx512f16c-builtins.c | 39 +++++++++++++++++++
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index a0ad2727c78a3..ae6a39bfd0f13 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -449,17 +449,33 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
 // Convert F16 halfs to floats.
 static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
                                             mlir::Location loc,
-                                            const StringRef str,
                                             llvm::ArrayRef<mlir::Value> ops,
-                                            mlir::Type dstTy) {
+                                            mlir::Type dstTy,
+                                            unsigned builtinID) {
   assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
          "Unknown cvtph2ps intrinsic");
 
   // If the SAE intrinsic doesn't use default rounding then we can't upgrade.
-  if (ops.size() == 4 &&
-      ops[3].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue() !=
-          4) {
-    return emitIntrinsicCallOp(builder, loc, str, dstTy, ops);
+  if (ops.size() == 4) {
+    auto constOp = ops[3].getDefiningOp<cir::ConstantOp>();
+    assert(constOp && "Expected constant operand");
+    if (constOp.getIntValue().getZExtValue() != 4) {
+      StringRef intrinsicName;
+      switch (builtinID) {
+      default:
+        llvm_unreachable("Unexpected builtin");
+      case X86::BI__builtin_ia32_vcvtph2ps_mask:
+        intrinsicName = "x86.avx512.mask.vcvtph2ps.128";
+        break;
+      case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+        intrinsicName = "x86.avx512.mask.vcvtph2ps.256";
+        break;
+      case X86::BI__builtin_ia32_vcvtph2ps512_mask:
+        intrinsicName = "x86.avx512.mask.vcvtph2ps.512";
+        break;
+      }
+      return emitIntrinsicCallOp(builder, loc, intrinsicName, dstTy, ops);
+    }
   }
 
   unsigned numElts = cast<cir::VectorType>(dstTy).getSize();
@@ -1875,8 +1891,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
   case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
     mlir::Location loc = getLoc(expr->getExprLoc());
-    return emitX86CvtF16ToFloatExpr(builder, loc, "cvtph2ps", ops,
-                                    convertType(expr->getType()));
+    return emitX86CvtF16ToFloatExpr(builder, loc, ops,
+                                    convertType(expr->getType()), builtinID);
   }
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
index 381e5d32af770..900a1b80a0f4d 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f16c-builtins.c
@@ -106,3 +106,42 @@ __m512 test_vcvtph2ps512_maskz(__m256i a, __mmask16 k) {
   typedef short __v16hi __attribute__((__vector_size__(32)));
   return __builtin_ia32_vcvtph2ps512_mask((__v16hi)a, _mm512_setzero_ps(), k, 4);
 }
+
+__m512 test_mm512_cvt_roundph_ps(__m256i a) {
+  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_cvt_roundph_ps
+  // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: @test_mm512_cvt_roundph_ps
+  // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 -1, i32 8)
+
+  // OGCG-LABEL: @test_mm512_cvt_roundph_ps
+  // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> zeroinitializer, i16 -1, i32 8)
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return _mm512_cvt_roundph_ps((__v16hi)a, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_mask_cvt_roundph_ps(__m512 w, __mmask16 u, __m256i a) {
+  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_mask_cvt_roundph_ps
+  // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: @test_mm512_mask_cvt_roundph_ps
+  // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8)
+
+  // OGCG-LABEL: @test_mm512_mask_cvt_roundph_ps
+  // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8)
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return _mm512_mask_cvt_roundph_ps(w, u, (__v16hi)a, _MM_FROUND_NO_EXC);
+}
+
+__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 u, __m256i a) {
+  // CIR-LABEL: cir.func no_inline dso_local @test_mm512_maskz_cvt_roundph_ps
+  // CIR: %{{.*}} = cir.call_llvm_intrinsic "x86.avx512.mask.vcvtph2ps.512" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : (!cir.vector<16 x !s16i>, !cir.vector<16 x !cir.float>, !u16i, !s32i) -> !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: @test_mm512_maskz_cvt_roundph_ps
+  // LLVM: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8)
+
+  // OGCG-LABEL: @test_mm512_maskz_cvt_roundph_ps
+  // OGCG: call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %{{.*}}, <16 x float> %{{.*}}, i16 %{{.*}}, i32 8)
+  typedef short __v16hi __attribute__((__vector_size__(32)));
+  return _mm512_maskz_cvt_roundph_ps(u, (__v16hi)a, _MM_FROUND_NO_EXC);
+}

From f02e93d8c143fb664f5125c0863efb2c49919835 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Tue, 6 Jan 2026 18:01:26 +0000
Subject: [PATCH 19/20] Update comments

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ae6a39bfd0f13..12766d5035305 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -446,7 +446,7 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
-// Convert F16 halfs to floats.
+// Convert f16 half values to floats.
 static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
                                             mlir::Location loc,
                                             llvm::ArrayRef<mlir::Value> ops,

From b0787e6320acdb5302e7249c9daab72fb7590f82 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu@gmail.com>
Date: Tue, 6 Jan 2026 19:00:37 +0000
Subject: [PATCH 20/20] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 12766d5035305..29a89e46bafba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -450,8 +450,7 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
 static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
                                             mlir::Location loc,
                                             llvm::ArrayRef<mlir::Value> ops,
-                                            mlir::Type dstTy,
-                                            unsigned builtinID) {
+                                            mlir::Type dstTy) {
   assert((ops.size() == 1 || ops.size() == 3 || ops.size() == 4) &&
          "Unknown cvtph2ps intrinsic");
 
@@ -460,21 +459,8 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
     auto constOp = ops[3].getDefiningOp<cir::ConstantOp>();
     assert(constOp && "Expected constant operand");
     if (constOp.getIntValue().getZExtValue() != 4) {
-      StringRef intrinsicName;
-      switch (builtinID) {
-      default:
-        llvm_unreachable("Unexpected builtin");
-      case X86::BI__builtin_ia32_vcvtph2ps_mask:
-        intrinsicName = "x86.avx512.mask.vcvtph2ps.128";
-        break;
-      case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-        intrinsicName = "x86.avx512.mask.vcvtph2ps.256";
-        break;
-      case X86::BI__builtin_ia32_vcvtph2ps512_mask:
-        intrinsicName = "x86.avx512.mask.vcvtph2ps.512";
-        break;
-      }
-      return emitIntrinsicCallOp(builder, loc, intrinsicName, dstTy, ops);
+      return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
+                                 dstTy, ops);
     }
   }
 
@@ -1892,7 +1878,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
     mlir::Location loc = getLoc(expr->getExprLoc());
     return emitX86CvtF16ToFloatExpr(builder, loc, ops,
-                                    convertType(expr->getType()), builtinID);
+                                    convertType(expr->getType()));
   }
   case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: