[CIR][X86]Implement handling for convert-half builtins by Priyanshu3820 · Pull Request #173143 · llvm/llvm-project

Priyanshu3820 · 2025-12-20T08:27:55Z

Related to: #167765

llvmbot · 2025-12-20T08:28:22Z

@llvm/pr-subscribers-clangir

@llvm/pr-subscribers-clang

Author: Priyanshu Kumar (Priyanshu3820)

Changes

Related to: #167765

Full diff: https://github.com/llvm/llvm-project/pull/173143.diff

2 Files Affected:

(modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+52-3)
(added) clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c (+80)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..59d467da3a9fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -362,6 +362,27 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
   return builder.createMul(loc, lhs, rhs);
 }
 
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+                                            mlir::Location loc,
+                                            mlir::Type dstTy,
+                                            SmallVectorImpl<mlir::Value> &ops) {
+
+  mlir::Value src = ops[0];
+  mlir::Value passthru = ops[1];
+
+  auto vecTy = mlir::cast<cir::VectorType>(src.getType());
+  uint64_t numElems = vecTy.getSize();
+
+  mlir::Value mask = getMaskVecValue(builder, loc, ops[2], numElems);
+
+  auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
+  mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
+
+  mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
+
+  return emitX86Select(builder, loc, mask, res, passthru);
+}
+
 static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
                                 llvm::SmallVector<mlir::Value> ops,
                                 bool isSigned) {
@@ -1662,12 +1683,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
   case X86::BI__builtin_ia32_cmpnltsd:
   case X86::BI__builtin_ia32_cmpnlesd:
   case X86::BI__builtin_ia32_cmpordsd:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case X86::BI__builtin_ia32_vcvtph2ps_mask:
   case X86::BI__builtin_ia32_vcvtph2ps256_mask:
-  case X86::BI__builtin_ia32_vcvtph2ps512_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+  case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
+                                    ops);
+  }
+  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    mlir::Value intrinsicMask = getMaskVecValue(builder, loc, ops[2], 4);
+    return emitIntrinsicCallOp(builder, loc,
+                               "x86.avx512bf16.mask.cvtneps2bf16.128",
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops[0], ops[1], intrinsicMask});
+  }
   case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
-  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    unsigned numElts = cast<cir::VectorType>(ops[1].getType()).getSize();
+    mlir::Value selectMask = getMaskVecValue(builder, loc, ops[2], numElts);
+    StringRef intrinsicName;
+    if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask)
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+    else
+      intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
+    mlir::Value intrinsicResult =
+        emitIntrinsicCallOp(builder, loc, intrinsicName, ops[1].getType(),
+                            mlir::ValueRange{ops[0]});
+    return emitX86Select(builder, loc, selectMask, intrinsicResult, ops[1]);
+  }
   case X86::BI__cpuid:
   case X86::BI__cpuidex:
   case X86::BI__emul:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
new file mode 100644
index 0000000000000..ccfc0d4a6a813
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bf16 -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+__m256bh test_mm512_mask_cvtneps_pbh(__m256bh src, __mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_mask_cvtneps_pbh
+  // CIR: cir.call @_mm512_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<16 x !cir.bf16>, !u16i, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_mask_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+
+  // OGCG-LABEL: @test_mm512_mask_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+  return _mm512_mask_cvtneps_pbh(src, k, a);
+}
+
+__m256bh test_mm512_maskz_cvtneps_pbh(__mmask16 k, __m512 a) {
+  // CIR-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm512_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u16i, !cir.vector<16 x !cir.float>) -> !cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_mm512_maskz_cvtneps_pbh
+  // LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> {{.+}})
+
+  // OGCG-LABEL:  @test_mm512_maskz_cvtneps_pbh
+  // OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> {{.+}})
+  return _mm512_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm256_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_mask_cvtneps_pbh
+  // CIR: cir.call @_mm256_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm256_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}})
+
+  // OGCG-LABEL: test_mm256_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}})  
+  return _mm256_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm256_maskz_cvtneps_pbh(__mmask8 k, __m256 a) {
+  // CIR-LABEL: test_mm256_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm256_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, !cir.vector<8 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+
+  // LLVM-LABEL: test_mm256_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}})
+
+  // OGCG-LABEL: test_mm256_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> {{.+}})
+  return _mm256_maskz_cvtneps_pbh(k, a);
+}
+
+__m128bh test_mm_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_mask_cvtneps_pbh
+  // CIR: cir.call @_mm_mask_cvtneps_pbh({{.+}}, {{.+}}, {{.+}}) : (!cir.vector<8 x !cir.bf16>, !u8i, !cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf1{{.+}}
+
+  // LLVM-LABEL: test_mm_mask_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_mask_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_mask_cvtneps_pbh(src, k, a);
+}
+
+__m128bh test_mm_maskz_cvtneps_pbh(__mmask8 k, __m128 a) {
+  // CIR-LABEL: test_mm_maskz_cvtneps_pbh
+  // CIR: cir.call @_mm_maskz_cvtneps_pbh({{.+}}, {{.+}}) : (!u8i, !cir.vector<4 x !cir.float>) -> !cir.vector<8 x !cir.bf16>
+  
+  // LLVM-LABEL: test_mm_maskz_cvtneps_pbh
+  // LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+
+  // OGCG-LABEL: test_mm_maskz_cvtneps_pbh
+  // OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> {{.+}}, <8 x bfloat> {{.+}}, <4 x i1> %extract.i)
+  return _mm_maskz_cvtneps_pbh(k, a);
+}

github-actions · 2025-12-20T08:55:05Z

🐧 Linux x64 Test Results

113437 tests passed
4162 tests skipped

✅ The build succeeded and all tests passed.

github-actions · 2025-12-22T14:30:27Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Priyanshu3820 · 2025-12-22T18:49:35Z

hi @andykaylor, @bcardosolopes, I have decided to handle the fp16 builtins in a separate PR(was having some issues in them so thought i'd be needing some more time debugging). This PR only contains the bf16 builtins(cvtneps2bf16_128_mask,
cvtneps2bf16_256_mask and cvtneps2bf16_512_mask). All tests pass.

Priyanshu3820 · 2025-12-27T15:41:27Z

hi @andykaylor, @bcardosolopes, I have decided to handle the fp16 builtins in a separate PR(was having some issues in them so thought i'd be needing some more time debugging). This PR only contains the bf16 builtins(cvtneps2bf16_128_mask, cvtneps2bf16_256_mask and cvtneps2bf16_512_mask). All tests pass.

f16 halfs to float builtins are now implemented. In this PR- #173572

andykaylor

This looks good, with one minor suggestion.

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Co-authored-by: Andy Kaylor <akaylor@nvidia.com>

Copilot

Pull request overview

This PR implements handling for X86 convert-half builtins in the ClangIR code generation framework, specifically targeting AVX512 BF16 intrinsics for converting single-precision floating-point values to bfloat16 format.

Adds implementation for cvtneps2bf16 builtins with mask support (128, 256, and 512-bit variants)
Includes comprehensive test coverage with CIR, LLVM, and original CodeGen verification
Moves previously unimplemented builtins from NYI (Not Yet Implemented) to functional implementations

Reviewed changes

Copilot reviewed 2 out of 2 changed files in this pull request and generated no comments.

File	Description
clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c	New test file covering mask and maskz variants of cvtneps_pbh functions for 128, 256, and 512-bit vectors with CIR, LLVM, and OGCG verification
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp	Implements handlers for `__builtin_ia32_cvtneps2bf16_*_mask` builtins, removing them from the NYI fallthrough list

💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.

andykaylor

lgtm

Related to: llvm#167765

Implement handling for convert-half builtins

0dd977d

Priyanshu3820 requested review from andykaylor, bcardosolopes, lanza and xlauko as code owners December 20, 2025 08:27

llvmbot added clang Clang issues not falling into any other category ClangIR Anything related to the ClangIR project labels Dec 20, 2025

Priyanshu3820 added 10 commits December 20, 2025 12:21

Update CIRGenBuiltinX86.cpp

ed01553

Update CIRGenBuiltinX86.cpp

50e380f

Update CIRGenBuiltinsX86.cpp

b238c17

add support for maskz builtins

9d3a326

Update CIRGenBuiltinX86.cpp

d2118e6

Merge branch 'main' into x86-cvt-half-restart

4717d1c

Guard cast to cir::VectorType

72d3ccc

Update CIRGenBuiltinX86.cpp and test

a37cef2

Update CIRGenBuiltinX86.cpp

cf040a0

Fix formatting

00037b3

Priyanshu3820 added 6 commits December 22, 2025 14:40

Fix syntax error

9e570f3

Update CIRGenBuiltinX86.cpp

3292431

Update CIRGenBuiltinX86.cpp and test

ac275a9

Update Test

2e6c997

Update CIRGenBuiltinX86.cpp

b985206

Update CIRGenBuiltinX86.cpp

a1b47ad

andykaylor approved these changes Jan 5, 2026

View reviewed changes

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp Outdated Show resolved Hide resolved

Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

591bda3

Co-authored-by: Andy Kaylor <akaylor@nvidia.com>

Copilot AI review requested due to automatic review settings January 6, 2026 04:36

Copilot started reviewing on behalf of Priyanshu3820 January 6, 2026 04:38 View session

Copilot AI reviewed Jan 6, 2026

View reviewed changes

Priyanshu3820 added 2 commits January 6, 2026 04:49

Fix formatting and syntax error

9e72a9a

Merge branch 'main' into x86-cvt-half-restart

6756bde

andykaylor approved these changes Jan 7, 2026

View reviewed changes

andykaylor merged commit ed45b00 into llvm:main Jan 7, 2026
10 checks passed

kshitijvp pushed a commit to kshitijvp/llvm-project that referenced this pull request Jan 9, 2026

[CIR][X86]Implement handling for convert-half builtins (llvm#173143)

191d540

Related to: llvm#167765

Priyanshu3820 added a commit to Priyanshu3820/llvm-project that referenced this pull request Jan 18, 2026

[CIR][X86]Implement handling for convert-half builtins (llvm#173143)

a190086

Related to: llvm#167765

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[CIR][X86]Implement handling for convert-half builtins#173143

[CIR][X86]Implement handling for convert-half builtins#173143
andykaylor merged 20 commits intollvm:mainfrom
Priyanshu3820:x86-cvt-half-restart

Priyanshu3820 commented Dec 20, 2025

Uh oh!

llvmbot commented Dec 20, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Dec 20, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Dec 22, 2025 •

edited

Loading

Uh oh!

Priyanshu3820 commented Dec 22, 2025

Uh oh!

Priyanshu3820 commented Dec 27, 2025

Uh oh!

andykaylor left a comment

Uh oh!

Uh oh!

Copilot AI left a comment

Uh oh!

andykaylor left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

Conversation

Priyanshu3820 commented Dec 20, 2025

Uh oh!

llvmbot commented Dec 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Dec 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

github-actions bot commented Dec 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Priyanshu3820 commented Dec 22, 2025

Uh oh!

Priyanshu3820 commented Dec 27, 2025

Uh oh!

andykaylor left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Copilot AI left a comment

Choose a reason for hiding this comment

Pull request overview

Reviewed changes

Uh oh!

andykaylor left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

llvmbot commented Dec 20, 2025 •

edited

Loading

github-actions bot commented Dec 20, 2025 •

edited

Loading

github-actions bot commented Dec 22, 2025 •

edited

Loading