-
Notifications
You must be signed in to change notification settings - Fork 16k
[SPIR-V] Add builtin/intrinsic for subgroup ballot #174862
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-spir-v Author: Joseph Huber (jhuber6) ChangesSummary: Full diff: https://github.com/llvm/llvm-project/pull/174862.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
index d2ef6f99a0502..495851ed1727a 100644
--- a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
+++ b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
@@ -21,3 +21,5 @@ def subgroup_local_invocation_id : SPIRVBuiltin<"uint32_t()", [NoThrow, Const]>;
def distance : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
def length : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
def smoothstep : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>;
+
+def subgroup_ballot : SPIRVBuiltin<"_ExtVector<4, uint32_t>(bool)", [NoThrow, Const]>;
diff --git a/clang/test/CodeGenSPIRV/Builtins/subgroup.c b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
new file mode 100644
index 0000000000000..2ae2013c3c23e
--- /dev/null
+++ b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -O1 -triple spirv64 -fsycl-is-device -x c++ %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -O1 -triple spirv64 -cl-std=CL3.0 -x cl %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -O1 -triple spirv32 -cl-std=CL3.0 -x cl %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK
+
+#if defined(__cplusplus)
+typedef bool _Bool;
+#endif
+typedef unsigned __attribute__((ext_vector_type(4))) int4;
+
+// CHECK: @{{.*}}test_subgroup_shuffle{{.*}}(
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: tail call <4 x i32> @llvm.spv.wave.ballot(i1 %i)
+[[clang::sycl_external]] int4 test_subgroup_shuffle(_Bool i) {
+ return __builtin_spirv_subgroup_ballot(i);
+}
diff --git a/clang/test/SemaSPIRV/BuiltIns/subgroup-errors.c b/clang/test/SemaSPIRV/BuiltIns/subgroup-errors.c
new file mode 100644
index 0000000000000..5ef9f499efd31
--- /dev/null
+++ b/clang/test/SemaSPIRV/BuiltIns/subgroup-errors.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -O1 -Wno-unused-value -triple spirv64 -fsycl-is-device -verify %s -o -
+// RUN: %clang_cc1 -O1 -Wno-unused-value -triple spirv64 -verify %s -cl-std=CL3.0 -x cl -o -
+// RUN: %clang_cc1 -O1 -Wno-unused-value -triple spirv32 -verify %s -cl-std=CL3.0 -x cl -o -
+
+typedef unsigned __attribute__((ext_vector_type(4))) int4;
+
+void ballot(_Bool c) {
+ int4 x;
+ x = __builtin_spirv_subgroup_ballot(c);
+ x = __builtin_spirv_subgroup_ballot(1);
+ x = __builtin_spirv_subgroup_ballot(x); // expected-error{{parameter of incompatible type}}
+ int y = __builtin_spirv_subgroup_ballot(c); // expected-error{{with an expression of incompatible type}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 402235ec7cd9c..51e4151c2fdae 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -118,6 +118,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
def int_spv_dot4add_i8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_dot4add_u8packed : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
+ def int_spv_wave_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index f991938c14dfe..1918f5701e3b7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -307,6 +307,8 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectWaveOpInst(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I, unsigned Opcode) const;
+ bool selectWaveBallot(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
bool selectWaveActiveCountBits(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
@@ -2710,6 +2712,17 @@ bool SPIRVInstructionSelector::selectWaveActiveCountBits(
return Result;
}
+bool SPIRVInstructionSelector::selectWaveBallot(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+
+ SPIRVType *IntTy = GR.getOrCreateSPIRVIntegerType(32, I, TII);
+ SPIRVType *BallotType = GR.getOrCreateSPIRVVectorType(IntTy, 4, I, TII);
+ Register BallotReg = MRI->createVirtualRegister(GR.getRegClass(BallotType));
+ return selectWaveOpInst(BallotReg, BallotType, I,
+ SPIRV::OpGroupNonUniformBallot);
+}
+
bool SPIRVInstructionSelector::selectWaveReduceMax(Register ResVReg,
const SPIRVType *ResType,
MachineInstr &I,
@@ -3797,6 +3810,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectExtInst(ResVReg, ResType, I, CL::u_clamp, GL::UClamp);
case Intrinsic::spv_sclamp:
return selectExtInst(ResVReg, ResType, I, CL::s_clamp, GL::SClamp);
+ case Intrinsic::spv_wave_ballot:
+ return selectWaveBallot(ResVReg, ResType, I);
case Intrinsic::spv_wave_active_countbits:
return selectWaveActiveCountBits(ResVReg, ResType, I);
case Intrinsic::spv_wave_all:
diff --git a/llvm/test/CodeGen/SPIRV/ballot.ll b/llvm/test/CodeGen/SPIRV/ballot.ll
new file mode 100644
index 0000000000000..3ca1a243feaea
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/ballot.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#ballot_type:]] = OpTypeVector %[[#uint]] 4
+; CHECK-DAG: %[[#bool:]] = OpTypeBool
+; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3
+
+; CHECK-LABEL: Begin function test_fun
+; CHECK: %[[#bexpr:]] = OpFunctionParameter %[[#bool]]
+define <4 x i32> @test_fun(i1 %expr) {
+entry:
+; CHECK: %[[#ballot:]] = OpGroupNonUniformBallot %[[#ballot_type]] %[[#scope]] %[[#bexpr]]
+ %0 = call <4 x i32> @llvm.spv.wave.ballot(i1 %expr)
+ ret <4 x i32> %0
+}
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
3d086a2 to
777aa9f
Compare
| def int_spv_subgroup_max_size : ClangBuiltin<"__builtin_spirv_subgroup_max_size">, | ||
| Intrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrWillReturn]>; | ||
| def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">, | ||
| DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this supposed to be a vector, or should it be i128?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From the documentation
Result Type must be a vector of four components of integer type scalar, whose Width operand is 32 and whose Signedness operand is 0.
Result is a set of bitfields where the first invocation is represented in the lowest bit of the first vector component and the last (up to the size of the scope) is the higher bit number of the last bitmask needed to represent all bits of the invocations in the scope restricted tangle.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
| Intrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrWillReturn]>; | ||
| def int_spv_subgroup_max_size : ClangBuiltin<"__builtin_spirv_subgroup_max_size">, | ||
| Intrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrWillReturn]>; | ||
| def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks very similar to int_spv_wave_ballot added in this pr. Can you see confirm if a new intrinsic is needed? https://github.com/llvm/llvm-project/pull/174638/changes#diff-acfd7e2e8e55491e92aa9258bb80a625c7c99cb446d54bb7d0e07320d3b8f677
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SPIRV Subgroups are more general / less constrained concept than what DX considers waves (even though in almost all cases they end up being the same thing in practice; per the spec, what DX calls a Quad models Subgroup, as does what DX calls a Wave). At a glance, it might be worth considering whether or not the DX specific intrinsic can be expressed / implemented in terms of the subgroup one, and then removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They both emit the same OpNonUniformBallot opcodes right now, might need to split them if they ever do something differently. I think DX can have a 128 bit wave size while all the hardware is 64 or less?
farzonl
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can reuse the work HLSL did in the spirv instruction selector\ spirv intrinsics.
I'll update it, but will still need the clang flavored builtin. I'll use the existing intrinsic but define a new clang builtin, but I also question why we call these things |
Summary: This patch adds an LLVM intrinsic and lowering for a subgroup ballot and a corresponding clang builtin. This uses the already present support but provides in a way accessible to other targets. With this and llvm#174655 we should be able to most of the basic functions, like shuffling, active masks, and reductions. More work will be needed for canonicalizing / exposing the SPIR-V functions, but these are the fundamental builtins I need.
|
Updated, now hopefully no one tells me that someone added something for #174655 already too |
That one you are reusing an existing intrinsic so it’s different. That pr is just adding clang target extensions. |
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: This patch adds an LLVM intrinsic and lowering for a subgroup ballot and a corresponding clang builtin. This uses the already present support but provides in a way accessible to other targets. With this and llvm#174655 we should be able to most of the basic functions, like shuffling, active masks, and reductions. More work will be needed for canonicalizing / exposing the SPIR-V functions, but these are the fundamental builtins I need.
Summary: #174862 and #174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
…910) Summary: llvm/llvm-project#174862 and llvm/llvm-project#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary: llvm/llvm-project#174862 and llvm/llvm-project#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these. (cherry picked from commit 5c43243)
Summary: This patch adds an LLVM intrinsic and lowering for a subgroup ballot and a corresponding clang builtin. This uses the already present support but provides in a way accessible to other targets. With this and llvm#174655 we should be able to most of the basic functions, like shuffling, active masks, and reductions. More work will be needed for canonicalizing / exposing the SPIR-V functions, but these are the fundamental builtins I need.
Summary: llvm#174862 and llvm#174655 provided the intrinsics required to get the fundamental operations working for these. This patch sets up the basic support (as far as I know). This should be the first step towards allowing SPIR-V to build things like the LLVM libc and the OpenMP Device Runtime Library. The implementations here are intentionally inefficient, such as not using the dedicated SPIR-V opcode for read firstlane. This is just to start and hopefully start testing things later. Would appreciate someone more familiar with the backend double-checking these.
Summary:
This patch adds an LLVM intrinsic and lowering for a subgroup ballot and
a corresponding clang builtin. This uses the already present support but
provides in a way accessible to other targets. With this and
#174655 we should be able to
most of the basic functions, like shuffling, active masks, and
reductions. More work will be needed for canonicalizing / exposing the
SPIR-V functions, but these are the fundamental builtins I need.