-
Notifications
You must be signed in to change notification settings - Fork 15k
[DirectX] Teach DXILResourceAccess about cbuffers #164554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This isn't reachable today but will come into play once we reorder passes for llvm#147352 and llvm#147351.
|
@llvm/pr-subscribers-backend-directx Author: Justin Bogner (bogner) ChangesThis isn't reachable today but will come into play once we reorder passes for #147352 and #147351. Patch is 41.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164554.diff 9 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
index 6579d3405cf39..e467e3c8a218c 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAccess.cpp
@@ -10,6 +10,7 @@
#include "DirectX.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/DXILResource.h"
+#include "llvm/Frontend/HLSL/HLSLResource.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +21,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#define DEBUG_TYPE "dxil-resource-access"
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset,
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (GEP->accumulateConstantOffset(DL, ConstantOffset)) {
APInt Scaled = ConstantOffset.udiv(ScalarSize);
- return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled);
+ return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled);
}
- auto IndexIt = GEP->idx_begin();
- assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
- "GEP is not indexing through pointer");
- ++IndexIt;
- Value *Offset = *IndexIt;
- assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
- return Offset;
+ unsigned NumIndices = GEP->getNumIndices();
+
+ // If we have a single index we're indexing into a top level array. This
+ // generally only happens with cbuffers.
+ if (NumIndices == 1)
+ return *GEP->idx_begin();
+
+ // If we have two indices, this should be a simple access through a pointer.
+ if (NumIndices == 2) {
+ auto IndexIt = GEP->idx_begin();
+ assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 &&
+ "GEP is not indexing through pointer");
+ ++IndexIt;
+ Value *Offset = *IndexIt;
+ assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP");
+ return Offset;
+ }
+
+ llvm_unreachable("Unhandled GEP structure for resource access");
}
static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI,
@@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) {
LI->replaceAllUsesWith(V);
}
+namespace {
+/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
+struct CBufferRowIntrin {
+ Intrinsic::ID IID;
+ Type *RetTy;
+ unsigned int EltSize;
+ unsigned int NumElts;
+
+ CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
+ assert(Ty == Ty->getScalarType() && "Expected scalar type");
+
+ switch (DL.getTypeSizeInBits(Ty)) {
+ case 16:
+ IID = Intrinsic::dx_resource_load_cbufferrow_8;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
+ EltSize = 2;
+ NumElts = 8;
+ break;
+ case 32:
+ IID = Intrinsic::dx_resource_load_cbufferrow_4;
+ RetTy = StructType::get(Ty, Ty, Ty, Ty);
+ EltSize = 4;
+ NumElts = 4;
+ break;
+ case 64:
+ IID = Intrinsic::dx_resource_load_cbufferrow_2;
+ RetTy = StructType::get(Ty, Ty);
+ EltSize = 8;
+ NumElts = 2;
+ break;
+ default:
+ llvm_unreachable("Only 16, 32, and 64 bit types supported");
+ }
+ }
+};
+} // namespace
+
+static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset,
+ dxil::ResourceTypeInfo &RTI) {
+ const DataLayout &DL = LI->getDataLayout();
+
+ Type *Ty = LI->getType();
+ assert(!isa<StructType>(Ty) && "Structs not handled yet");
+ CBufferRowIntrin Intrin(DL, Ty->getScalarType());
+
+ StringRef Name = LI->getName();
+ Value *Handle = II->getOperand(0);
+
+ IRBuilder<> Builder(LI);
+
+ ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1));
+ assert(GlobalOffset && "CBuffer getpointer index must be constant");
+
+ unsigned int FixedOffset = GlobalOffset->getZExtValue();
+ // If we have a further constant offset we can just fold it in to the fixed
+ // offset.
+ if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) {
+ FixedOffset += ConstOffset->getZExtValue();
+ Offset = nullptr;
+ }
+
+ Value *CurrentRow = ConstantInt::get(
+ Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes);
+ unsigned int CurrentIndex =
+ (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
+
+ assert(!(CurrentIndex && Offset) &&
+ "Dynamic indexing into elements of cbuffer rows is not supported");
+ if (Offset)
+ CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset;
+
+ auto *CBufLoad = Builder.CreateIntrinsic(
+ Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load");
+ auto *Elt =
+ Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract");
+
+ unsigned int Remaining =
+ ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
+ if (Remaining == 0) {
+ // We only have a single element, so we're done.
+ Value *Result = Elt;
+
+ // However, if we loaded a <1 x T>, then we need to adjust the type.
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
+ assert(VT->getNumElements() == 1 && "Can't have multiple elements here");
+ Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
+ Builder.getInt32(0), Name);
+ }
+ LI->replaceAllUsesWith(Result);
+ return;
+ }
+
+ // Walk each element and extract it, wrapping to new rows as needed.
+ SmallVector<Value *> Extracts{Elt};
+ while (Remaining--) {
+ CurrentIndex %= Intrin.NumElts;
+
+ if (CurrentIndex == 0) {
+ CurrentRow = Builder.CreateAdd(CurrentRow,
+ ConstantInt::get(Builder.getInt32Ty(), 1));
+ CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID,
+ {Handle, CurrentRow}, nullptr,
+ Name + ".load");
+ }
+
+ Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
+ Name + ".extract"));
+ }
+
+ // Finally, we build up the original loaded value.
+ Value *Result = PoisonValue::get(Ty);
+ for (int I = 0, E = Extracts.size(); I < E; ++I)
+ Result = Builder.CreateInsertElement(
+ Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I));
+ LI->replaceAllUsesWith(Result);
+}
+
static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
dxil::ResourceTypeInfo &RTI) {
switch (RTI.getResourceKind()) {
@@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::RawBuffer:
case dxil::ResourceKind::StructuredBuffer:
return createRawLoad(II, LI, Offset);
+ case dxil::ResourceKind::CBuffer:
+ return createCBufferLoad(II, LI, Offset, RTI);
case dxil::ResourceKind::Texture1D:
case dxil::ResourceKind::Texture2D:
case dxil::ResourceKind::Texture2DMS:
@@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset,
case dxil::ResourceKind::TextureCubeArray:
case dxil::ResourceKind::FeedbackTexture2D:
case dxil::ResourceKind::FeedbackTexture2DArray:
- case dxil::ResourceKind::CBuffer:
case dxil::ResourceKind::TBuffer:
- // TODO: handle these
+ reportFatalUsageError("Load not yet implemented for resource type");
return;
case dxil::ResourceKind::Sampler:
case dxil::ResourceKind::RTAccelerationStructure:
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
new file mode 100644
index 0000000000000..22fba8c1d5f8c
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-struct.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Tests for indexed types in dynamically indexed arrays in cbuffers.
+;
+; struct S {
+; float x[2];
+; uint q;
+; };
+; cbuffer CB : register(b0) {
+; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3
+; S v[3]; // offset 48, size 24 (+8) * 3
+; }
+%S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }>
+%__cblayout_CB = type <{
+ <{
+ [2 x <{ <3 x i32>, target("dx.Padding", 4) }>],
+ <3 x i32>
+ }>,
+ target("dx.Padding", 4),
+ <{
+ [2 x <{ %S, target("dx.Padding", 8) }>], %S
+ }>
+}>
+
+@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst, i32 %idx) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ;; w[2].z
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: store i32 [[X]], ptr %dst
+ %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40
+ %w_load = load i32, ptr addrspace(2) %w_gep, align 4
+ store i32 %w_load, ptr %dst, align 4
+
+ ;; v[2].q
+ ;
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ ; CHECK: store i32 [[X]], ptr [[PTR]]
+ %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84
+ %v_load = load i32, ptr addrspace(2) %v_gep, align 4
+ %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4
+ store i32 %v_load, ptr %v.i, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
new file mode 100644
index 0000000000000..cbd76fc00b813
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-of-vector.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+;
+; Test for when we have indices into both the array and the vector: ie, s[1][3]
+
+; cbuffer CB : register(b0) {
+; uint4 s[2]; // offset 0, size 16 * 2
+; }
+%__cblayout_CB = type <{ [2 x <4 x i32>] }>
+
+@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ; CHECK: store i32 [[X]], ptr %dst
+ %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28
+ %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4
+ store i32 %i8_vecext, ptr %dst, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ;;
+ ;; It would be nice to avoid the redundant vector creation here, but that's
+ ;; outside of the scope of this pass.
+ ;;
+ ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3
+ ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3
+ ; CHECK: store i32 [[X_EXT]], ptr %dst
+ %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1
+ %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16
+ %typed_vecext = extractelement <4 x i32> %typed_load, i32 3
+ store i32 %typed_vecext, ptr %dst, align 4
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
new file mode 100644
index 0000000000000..a561d8c0aea5d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-array-typedgep.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+; float a1[3];
+; }
+%__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }>
+
+@CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: store float [[X]], ptr %dst
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8
+ %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1
+ %a1 = load float, ptr addrspace(2) %a1_gep, align 4
+ store float %a1, ptr %dst, align 32
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll
new file mode 100644
index 0000000000000..68dfbb184083f
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-arrays.ll
@@ -0,0 +1,129 @@
+; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s
+
+; cbuffer CB : register(b0) {
+; float a1[3]; // offset 0, size 4 (+12) * 3
+; double3 a2[2]; // offset 48, size 24 (+8) * 2
+; float16_t a3[2][2]; // offset 112, size 2 (+14) * 4
+; uint64_t a4[3]; // offset 176, size 8 (+8) * 3
+; int4 a5[2][3][4]; // offset 224, size 16 * 24
+; uint16_t a6[1]; // offset 608, size 2 (+14) * 1
+; int64_t a7[2]; // offset 624, size 8 (+8) * 2
+; bool a8[4]; // offset 656, size 4 (+12) * 4
+; }
+%__cblayout_CB = type <{
+ <{ [2 x <{ float, target("dx.Padding", 12) }>], float }>, target("dx.Padding", 12),
+ <{ [1 x <{ <3 x double>, target("dx.Padding", 8) }>], <3 x double> }>, target("dx.Padding", 8),
+ <{ [3 x <{ half, target("dx.Padding", 14) }>], half }>, target("dx.Padding", 14),
+ <{ [2 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+ [24 x <4 x i32>],
+ [1 x i16], target("dx.Padding", 14),
+ <{ [1 x <{ i64, target("dx.Padding", 8) }>], i64 }>, target("dx.Padding", 8),
+ <{ [3 x <{ i32, target("dx.Padding", 12) }>], i32 }>
+}>
+
+@CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison
+
+; CHECK: define void @f
+define void @f(ptr %dst) {
+entry:
+ %CB.cb_h.i.i = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null)
+ store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h.i.i, ptr @CB.cb, align 4
+
+ ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb
+ %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4
+
+ ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1)
+ ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
+ ; CHECK: store float [[X]], ptr %dst
+ %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0)
+ %a1_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a1_ptr, i32 16
+ %a1 = load float, ptr addrspace(2) %a1_gep, align 4
+ store float %a1, ptr %dst, align 32
+
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 5)
+ ; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
+ ; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 6)
+ ; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
+ ; CHECK: [[VEC0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <3 x double> [[VEC0]], double [[Y]], i32 1
+ ; CHECK: [[VEC2:%.*]] = insertelement <3 x double> [[VEC1]], double [[Z]], i32 2
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ ; CHECK: store <3 x double> [[VEC2]], ptr [[PTR]]
+ %a2_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48)
+ %a2_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a2_ptr, i32 32
+ %a2 = load <3 x double>, ptr addrspace(2) %a2_gep, align 8
+ %a2.i = getelementptr inbounds nuw i8, ptr %dst, i32 8
+ store <3 x double> %a2, ptr %a2.i, align 32
+
+ ; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8)
+ ; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 32
+ ; CHECK: store half [[X]], ptr [[PTR]]
+ %a3_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 112)
+ %a3_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a3_ptr, i32 16
+ %a3 = load half, ptr addrspace(2) %a3_gep, align 2
+ %a3.i = getelementptr inbounds nuw i8, ptr %dst, i32 32
+ store half %a3, ptr %a3.i, align 2
+
+ ; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 12)
+ ; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
+ ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ ; CHECK: store i64 [[X]], ptr [[PTR]]
+ %a4_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 176)
+ %a4_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %a4_ptr, i32 16
+ %a4 = load i64, ptr addrspace(2) %a4_gep, align 8
+ %a4.i = getelementptr inbounds nuw i8, ptr %dst, i32 40
+ store i64 %a4, ptr %a4.i, align 8
+
+ ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 26)
+ ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
+ ; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
+ ; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2
+ ; CHECK: [[A:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3
+ ; CHECK: [[VEC0:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
+ ; CHECK: [[VEC1:%.*]] = insertelement <4 x i...
[truncated]
|
llvm/test/CodeGen/DirectX/ResourceAccess/load-cbuffer-vectors.ll
Outdated
Show resolved
Hide resolved
| ; uint s[10]; // offset 0, size 4 (+12) * 10 | ||
| ; uint t[10]; // offset 160, size 4 (+12) * 10 | ||
| ; } | ||
| %__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__cblayout_CB is missing the t[10]:
| %__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> | |
| %__cblayout_CB = type <{ <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }>, <{ [9 x <{ i32, target("dx.Padding", 12) }>], i32 }> }> |
And consider making t[10] different type than s[10] with different padding size.
| %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 | ||
| store i32 %i8_vecext, ptr %dst, align 4 | ||
|
|
||
| ;; s[1].z |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ;; s[1].z | |
| ;; s[1].w |
The code is accessing 4th element of the s[1] vector (here and in the second case below as well), and the test description also mentions s[1][3].
| unsigned int FixedOffset = GlobalOffset->getZExtValue(); | ||
| // If we have a further constant offset we can just fold it in to the fixed | ||
| // offset. | ||
| if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since Offset is not optional would dyn_cast_or_null be better here?
| if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { | |
| if (auto *ConstOffset = dyn_cast_or_null<ConstantInt>(Offset)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dyn_cast_or_null and dyn_cast_if_present are the same thing. There are comments to the effect of or_null being deprecated from 2022, but it doesn't appear that anybody is really working towards that. I can switch to or_null for familiarity sake if you prefer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll leave that up to you. I just had to look up dyn_cast_if_present because I have not seen it before. I had no idea dyn_cast_or_null was suggested to be deprecated.
| assert(!(CurrentIndex && Offset) && | ||
| "Dynamic indexing into elements of cbuffer rows is not supported"); | ||
| if (Offset) | ||
| CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is Offset in bytes or rows? Is it being added to CurrentRow here, but on line 245 is it added to FixedOffset, which is in bytes.
| @@ -0,0 +1,64 @@ | |||
| ; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did not review this file because it is XFAIL and I think the GEPs are likely going to be updated.
Co-authored-by: Helena Kotas <[email protected]>
This isn't reachable today but will come into play once we reorder passes for #147352 and #147351.
Note that the
CBufferRowIntrinhelper struct is copied from theDXILCBufferAccesspass, but it will be removed from there when we simplify that pass in #147351