-
Notifications
You must be signed in to change notification settings - Fork 15k
[DirectX] Teach DXILResourceAccess about cbuffers #164554
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
| #include "DirectX.h" | ||
| #include "llvm/ADT/SetVector.h" | ||
| #include "llvm/Analysis/DXILResource.h" | ||
| #include "llvm/Frontend/HLSL/HLSLResource.h" | ||
| #include "llvm/IR/BasicBlock.h" | ||
| #include "llvm/IR/Dominators.h" | ||
| #include "llvm/IR/IRBuilder.h" | ||
|
|
@@ -20,6 +21,7 @@ | |
| #include "llvm/IR/IntrinsicsDirectX.h" | ||
| #include "llvm/IR/User.h" | ||
| #include "llvm/InitializePasses.h" | ||
| #include "llvm/Support/FormatVariadic.h" | ||
| #include "llvm/Transforms/Utils/ValueMapper.h" | ||
|
|
||
| #define DEBUG_TYPE "dxil-resource-access" | ||
|
|
@@ -44,16 +46,28 @@ static Value *calculateGEPOffset(GetElementPtrInst *GEP, Value *PrevOffset, | |
| APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); | ||
| if (GEP->accumulateConstantOffset(DL, ConstantOffset)) { | ||
| APInt Scaled = ConstantOffset.udiv(ScalarSize); | ||
| return ConstantInt::get(Type::getInt32Ty(GEP->getContext()), Scaled); | ||
| return ConstantInt::get(DL.getIndexType(GEP->getType()), Scaled); | ||
| } | ||
|
|
||
| auto IndexIt = GEP->idx_begin(); | ||
| assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && | ||
| "GEP is not indexing through pointer"); | ||
| ++IndexIt; | ||
| Value *Offset = *IndexIt; | ||
| assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); | ||
| return Offset; | ||
| unsigned NumIndices = GEP->getNumIndices(); | ||
|
|
||
| // If we have a single index we're indexing into a top level array. This | ||
| // generally only happens with cbuffers. | ||
| if (NumIndices == 1) | ||
| return *GEP->idx_begin(); | ||
|
|
||
| // If we have two indices, this should be a simple access through a pointer. | ||
| if (NumIndices == 2) { | ||
| auto IndexIt = GEP->idx_begin(); | ||
| assert(cast<ConstantInt>(IndexIt)->getZExtValue() == 0 && | ||
| "GEP is not indexing through pointer"); | ||
| ++IndexIt; | ||
| Value *Offset = *IndexIt; | ||
| assert(++IndexIt == GEP->idx_end() && "Too many indices in GEP"); | ||
| return Offset; | ||
| } | ||
|
|
||
| llvm_unreachable("Unhandled GEP structure for resource access"); | ||
| } | ||
|
|
||
| static void createTypedBufferStore(IntrinsicInst *II, StoreInst *SI, | ||
|
|
@@ -171,6 +185,123 @@ static void createRawLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset) { | |
| LI->replaceAllUsesWith(V); | ||
| } | ||
|
|
||
| namespace { | ||
| /// Helper for building a `load.cbufferrow` intrinsic given a simple type. | ||
| struct CBufferRowIntrin { | ||
| Intrinsic::ID IID; | ||
| Type *RetTy; | ||
| unsigned int EltSize; | ||
| unsigned int NumElts; | ||
|
|
||
| CBufferRowIntrin(const DataLayout &DL, Type *Ty) { | ||
| assert(Ty == Ty->getScalarType() && "Expected scalar type"); | ||
|
|
||
| switch (DL.getTypeSizeInBits(Ty)) { | ||
| case 16: | ||
| IID = Intrinsic::dx_resource_load_cbufferrow_8; | ||
| RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty); | ||
| EltSize = 2; | ||
| NumElts = 8; | ||
| break; | ||
| case 32: | ||
| IID = Intrinsic::dx_resource_load_cbufferrow_4; | ||
| RetTy = StructType::get(Ty, Ty, Ty, Ty); | ||
| EltSize = 4; | ||
| NumElts = 4; | ||
| break; | ||
| case 64: | ||
| IID = Intrinsic::dx_resource_load_cbufferrow_2; | ||
| RetTy = StructType::get(Ty, Ty); | ||
| EltSize = 8; | ||
| NumElts = 2; | ||
| break; | ||
| default: | ||
| llvm_unreachable("Only 16, 32, and 64 bit types supported"); | ||
| } | ||
| } | ||
| }; | ||
| } // namespace | ||
|
|
||
| static void createCBufferLoad(IntrinsicInst *II, LoadInst *LI, Value *Offset, | ||
| dxil::ResourceTypeInfo &RTI) { | ||
| const DataLayout &DL = LI->getDataLayout(); | ||
|
|
||
| Type *Ty = LI->getType(); | ||
| assert(!isa<StructType>(Ty) && "Structs not handled yet"); | ||
| CBufferRowIntrin Intrin(DL, Ty->getScalarType()); | ||
|
|
||
| StringRef Name = LI->getName(); | ||
| Value *Handle = II->getOperand(0); | ||
|
|
||
| IRBuilder<> Builder(LI); | ||
|
|
||
| ConstantInt *GlobalOffset = dyn_cast<ConstantInt>(II->getOperand(1)); | ||
| assert(GlobalOffset && "CBuffer getpointer index must be constant"); | ||
|
|
||
| unsigned int FixedOffset = GlobalOffset->getZExtValue(); | ||
| // If we have a further constant offset we can just fold it in to the fixed | ||
| // offset. | ||
| if (auto *ConstOffset = dyn_cast_if_present<ConstantInt>(Offset)) { | ||
| FixedOffset += ConstOffset->getZExtValue(); | ||
| Offset = nullptr; | ||
| } | ||
|
|
||
| Value *CurrentRow = ConstantInt::get( | ||
| Builder.getInt32Ty(), FixedOffset / hlsl::CBufferRowSizeInBytes); | ||
| unsigned int CurrentIndex = | ||
| (FixedOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize; | ||
|
|
||
| assert(!(CurrentIndex && Offset) && | ||
| "Dynamic indexing into elements of cbuffer rows is not supported"); | ||
| if (Offset) | ||
| CurrentRow = FixedOffset ? Builder.CreateAdd(CurrentRow, Offset) : Offset; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is |
||
|
|
||
| auto *CBufLoad = Builder.CreateIntrinsic( | ||
| Intrin.RetTy, Intrin.IID, {Handle, CurrentRow}, nullptr, Name + ".load"); | ||
| auto *Elt = | ||
| Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, Name + ".extract"); | ||
|
|
||
| unsigned int Remaining = | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming the only valid return values for Dl.getTypeSizeInBits(Ty) are 64,32, and 16. It looks like all possible values of Remaining are going to be zero. Was there something else causing this check to be here? |
||
| ((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1; | ||
| if (Remaining == 0) { | ||
| // We only have a single element, so we're done. | ||
| Value *Result = Elt; | ||
|
|
||
| // However, if we loaded a <1 x T>, then we need to adjust the type. | ||
| if (auto *VT = dyn_cast<FixedVectorType>(Ty)) { | ||
| assert(VT->getNumElements() == 1 && "Can't have multiple elements here"); | ||
| Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are poisonvalues being used here instead of something like undef? |
||
| Builder.getInt32(0), Name); | ||
| } | ||
| LI->replaceAllUsesWith(Result); | ||
| return; | ||
| } | ||
|
|
||
| // Walk each element and extract it, wrapping to new rows as needed. | ||
| SmallVector<Value *> Extracts{Elt}; | ||
| while (Remaining--) { | ||
| CurrentIndex %= Intrin.NumElts; | ||
|
|
||
| if (CurrentIndex == 0) { | ||
| CurrentRow = Builder.CreateAdd(CurrentRow, | ||
| ConstantInt::get(Builder.getInt32Ty(), 1)); | ||
| CBufLoad = Builder.CreateIntrinsic(Intrin.RetTy, Intrin.IID, | ||
| {Handle, CurrentRow}, nullptr, | ||
| Name + ".load"); | ||
| } | ||
|
|
||
| Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++}, | ||
| Name + ".extract")); | ||
| } | ||
|
|
||
| // Finally, we build up the original loaded value. | ||
| Value *Result = PoisonValue::get(Ty); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be undef too? |
||
| for (int I = 0, E = Extracts.size(); I < E; ++I) | ||
| Result = Builder.CreateInsertElement( | ||
| Result, Extracts[I], Builder.getInt32(I), Name + formatv(".upto{}", I)); | ||
| LI->replaceAllUsesWith(Result); | ||
| } | ||
|
|
||
| static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, | ||
| dxil::ResourceTypeInfo &RTI) { | ||
| switch (RTI.getResourceKind()) { | ||
|
|
@@ -179,6 +310,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, | |
| case dxil::ResourceKind::RawBuffer: | ||
| case dxil::ResourceKind::StructuredBuffer: | ||
| return createRawLoad(II, LI, Offset); | ||
| case dxil::ResourceKind::CBuffer: | ||
| return createCBufferLoad(II, LI, Offset, RTI); | ||
| case dxil::ResourceKind::Texture1D: | ||
| case dxil::ResourceKind::Texture2D: | ||
| case dxil::ResourceKind::Texture2DMS: | ||
|
|
@@ -190,9 +323,8 @@ static void createLoadIntrinsic(IntrinsicInst *II, LoadInst *LI, Value *Offset, | |
| case dxil::ResourceKind::TextureCubeArray: | ||
| case dxil::ResourceKind::FeedbackTexture2D: | ||
| case dxil::ResourceKind::FeedbackTexture2DArray: | ||
| case dxil::ResourceKind::CBuffer: | ||
| case dxil::ResourceKind::TBuffer: | ||
| // TODO: handle these | ||
| reportFatalUsageError("Load not yet implemented for resource type"); | ||
| return; | ||
| case dxil::ResourceKind::Sampler: | ||
| case dxil::ResourceKind::RTAccelerationStructure: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| ; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s | ||
| ; | ||
| ; Tests for indexed types in dynamically indexed arrays in cbuffers. | ||
| ; | ||
| ; struct S { | ||
| ; float x[2]; | ||
| ; uint q; | ||
| ; }; | ||
| ; cbuffer CB : register(b0) { | ||
| ; uint32_t3 w[3]; // offset 0, size 12 (+4) * 3 | ||
| ; S v[3]; // offset 48, size 24 (+8) * 3 | ||
| ; } | ||
| %S = type <{ <{ [1 x <{ float, target("dx.Padding", 12) }>], float }>, i32 }> | ||
| %__cblayout_CB = type <{ | ||
| <{ | ||
| [2 x <{ <3 x i32>, target("dx.Padding", 4) }>], | ||
| <3 x i32> | ||
| }>, | ||
| target("dx.Padding", 4), | ||
| <{ | ||
| [2 x <{ %S, target("dx.Padding", 8) }>], %S | ||
| }> | ||
| }> | ||
|
|
||
| @CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison | ||
|
|
||
| ; CHECK: define void @f | ||
| define void @f(ptr %dst, i32 %idx) { | ||
| entry: | ||
| %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) | ||
| store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 | ||
|
|
||
| ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb | ||
| %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 | ||
|
|
||
| ;; w[2].z | ||
| ; | ||
| ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 2) | ||
| ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 2 | ||
| ; CHECK: store i32 [[X]], ptr %dst | ||
| %w_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) | ||
| %w_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %w_ptr, i32 40 | ||
| %w_load = load i32, ptr addrspace(2) %w_gep, align 4 | ||
| store i32 %w_load, ptr %dst, align 4 | ||
|
|
||
| ;; v[2].q | ||
| ; | ||
| ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 8) | ||
| ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1 | ||
| ; CHECK: [[PTR:%.*]] = getelementptr inbounds nuw i8, ptr %dst, i32 4 | ||
| ; CHECK: store i32 [[X]], ptr [[PTR]] | ||
| %v_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 48) | ||
| %v_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %v_ptr, i32 84 | ||
| %v_load = load i32, ptr addrspace(2) %v_gep, align 4 | ||
| %v.i = getelementptr inbounds nuw i8, ptr %dst, i32 4 | ||
| store i32 %v_load, ptr %v.i, align 4 | ||
|
|
||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| ; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s | ||
| ; | ||
| ; Test for when we have indices into both the array and the vector: ie, s[1][3] | ||
|
|
||
| ; cbuffer CB : register(b0) { | ||
| ; uint4 s[2]; // offset 0, size 16 * 2 | ||
| ; } | ||
| %__cblayout_CB = type <{ [2 x <4 x i32>] }> | ||
|
|
||
| @CB.cb = local_unnamed_addr global target("dx.CBuffer", %__cblayout_CB) poison | ||
|
|
||
| ; CHECK: define void @f | ||
| define void @f(ptr %dst) { | ||
| entry: | ||
| %CB.cb_h = tail call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefromimplicitbinding(i32 1, i32 0, i32 1, i32 0, ptr null) | ||
| store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 | ||
|
|
||
| ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb | ||
| %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 4 | ||
|
|
||
| ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) | ||
| ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 | ||
| ; CHECK: store i32 [[X]], ptr %dst | ||
| %i8_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) | ||
| %i8_gep = getelementptr inbounds nuw i8, ptr addrspace(2) %i8_ptr, i32 28 | ||
| %i8_vecext = load i32, ptr addrspace(2) %i8_gep, align 4 | ||
| store i32 %i8_vecext, ptr %dst, align 4 | ||
|
|
||
| ; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) | ||
| ; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 3 | ||
| ;; | ||
| ;; It would be nice to avoid the redundant vector creation here, but that's | ||
| ;; outside of the scope of this pass. | ||
| ;; | ||
| ; CHECK: [[X_VEC:%.*]] = insertelement <4 x i32> {{%.*}}, i32 [[X]], i32 3 | ||
| ; CHECK: [[X_EXT:%.*]] = extractelement <4 x i32> [[X_VEC]], i32 3 | ||
| ; CHECK: store i32 [[X_EXT]], ptr %dst | ||
| %typed_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) | ||
| %typed_gep = getelementptr <4 x i32>, ptr addrspace(2) %typed_ptr, i32 1 | ||
| %typed_load = load <4 x i32>, ptr addrspace(2) %typed_gep, align 16 | ||
| %typed_vecext = extractelement <4 x i32> %typed_load, i32 3 | ||
| store i32 %typed_vecext, ptr %dst, align 4 | ||
|
|
||
| ret void | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| ; RUN: opt -S -dxil-resource-access -mtriple=dxil %s | FileCheck %s | ||
|
|
||
| ; cbuffer CB : register(b0) { | ||
| ; float a1[3]; | ||
| ; } | ||
| %__cblayout_CB = type <{ [2 x <{ float, [12 x i8] }>], float }> | ||
|
|
||
| @CB.cb = global target("dx.CBuffer", %__cblayout_CB) poison | ||
|
|
||
| ; CHECK: define void @f | ||
| define void @f(ptr %dst) { | ||
| entry: | ||
| %CB.cb_h = call target("dx.CBuffer", %__cblayout_CB) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, ptr null) | ||
| store target("dx.CBuffer", %__cblayout_CB) %CB.cb_h, ptr @CB.cb, align 4 | ||
|
|
||
| ; CHECK: [[CB:%.*]] = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb | ||
| ; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", %__cblayout_CB) [[CB]], i32 1) | ||
| ; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0 | ||
| ; CHECK: store float [[X]], ptr %dst | ||
| %CB.cb = load target("dx.CBuffer", %__cblayout_CB), ptr @CB.cb, align 8 | ||
| %a1_ptr = call ptr addrspace(2) @llvm.dx.resource.getpointer(target("dx.CBuffer", %__cblayout_CB) %CB.cb, i32 0) | ||
| %a1_gep = getelementptr inbounds <{ [2 x <{ float, [12 x i8] }>], float }>, ptr addrspace(2) %a1_ptr, i32 0, i32 0, i32 1 | ||
| %a1 = load float, ptr addrspace(2) %a1_gep, align 4 | ||
| store float %a1, ptr %dst, align 32 | ||
|
|
||
| ret void | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since Offset is not optional would
dyn_cast_or_nullbe better here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dyn_cast_or_nullanddyn_cast_if_presentare the same thing. There are comments to the effect of or_null being deprecated from 2022, but it doesn't appear that anybody is really working towards that. I can switch to or_null for familiarity sake if you prefer.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll leave that up to you. I just had to look up
dyn_cast_if_presentbecause I have not seen it before. I had no ideadyn_cast_or_nullwas suggested to be deprecated.