Skip to content

Commit

Permalink
implemented indirect dispatch (#66)
Browse files Browse the repository at this point in the history
* implemented indirect dispatch

implemented indirect dispatch

* work on review comments

-added tests for indirect barriers
-added explicit state transition to and from indirect on dx12
-added indirect pipeline stage in ResourceState for placing stronger barriers on vulkan when using indirect

* redundant spaces

redundant spaces

* clean .gitignore from local files

* added actual usage of indirect stage

* work on review comments

-added notImplemeted runtime exception in indirectDispatch on dx12
-added indirect resource access in barrier test output
-adeed test for a situation when a buffer is binded as a UAV and Indirect simultaneously

* build fix

-build fix on metal
-expanded ResourceStateIndirectAndUAV test by adding a subsequent write access
  • Loading branch information
KirillAlekseeenko authored Jul 6, 2024
1 parent b81b743 commit 35ebfc4
Show file tree
Hide file tree
Showing 14 changed files with 105 additions and 35 deletions.
1 change: 1 addition & 0 deletions Engine/gapi/abstractgraphicsapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ namespace Tempest {
virtual void dispatchMeshIndirect(const Buffer& indirect, size_t offset);

virtual void dispatch(size_t x, size_t y, size_t z) = 0;
virtual void dispatchIndirect(const Buffer& indirect, size_t offset) = 0;
};

using PBuffer = Detail::DSharedPtr<Buffer*>;
Expand Down
60 changes: 32 additions & 28 deletions Engine/gapi/directx12/dxcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,10 @@ void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
impl->Dispatch(UINT(x),UINT(y),UINT(z));
}

void DxCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
throw std::runtime_error("dispatch indirect is not implemented for dx12");
}

void DxCommandBuffer::setPipeline(Tempest::AbstractGraphicsApi::Pipeline& p) {
DxPipeline& px = reinterpret_cast<DxPipeline&>(p);
pushBaseInstanceId = px.pushBaseInstanceId;
Expand Down Expand Up @@ -617,14 +621,7 @@ void DxCommandBuffer::implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompu

void DxCommandBuffer::restoreIndirect() {
for(auto& i:indirectCmd) {
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = i;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitIndirectToCommonStateTransition(i);
}
indirectCmd.clear();
}
Expand Down Expand Up @@ -743,21 +740,14 @@ void DxCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect,
auto& sign = dev.drawIndirectSgn.get();

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);

if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) {
indirectCmd.insert(ind.impl.get());

D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = ind.impl.get();
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitCommonToIndirectStateTransition(ind.impl.get());
}

impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0);
}

Expand All @@ -770,19 +760,11 @@ void DxCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& in
auto& sign = dev.drawMeshIndirectSgn.get();

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);

if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) {
indirectCmd.insert(ind.impl.get());

D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = ind.impl.get();
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitCommonToIndirectStateTransition(ind.impl.get());
}
impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0);
}
Expand Down Expand Up @@ -987,6 +969,28 @@ void DxCommandBuffer::copyNative(AbstractGraphicsApi::Buffer& dstBuf, size_t off
impl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
}

void DxCommandBuffer::issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter)
{
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buf;
barrier.Transition.StateAfter = stateAfter;
barrier.Transition.StateBefore = stateBefore;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
}

void DxCommandBuffer::issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf)
{
issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
}

void DxCommandBuffer::issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf)
{
issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COMMON);
}

#endif


5 changes: 5 additions & 0 deletions Engine/gapi/directx12/dxcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect (const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;

Expand Down Expand Up @@ -133,6 +134,10 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void pushStage(Stage* cmd);
void implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompute);
void restoreIndirect();

void issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter);
void issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf);
void issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf);
};

}
Expand Down
4 changes: 4 additions & 0 deletions Engine/gapi/directx12/dxdevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ DxDevice::DxDevice(IDXGIAdapter1& adapter, const ApiEntry& dllApi)
arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH;
dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid<ID3D12CommandSignature>(), reinterpret_cast<void**>(&drawMeshIndirectSgn)));
}

arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH;
desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid<ID3D12CommandSignature>(), reinterpret_cast<void**>(&dispatchIndirectSgn)));
}

allocator.setDevice(*this);
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/directx12/dxdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ class DxDevice : public AbstractGraphicsApi::Device {

ComPtr<ID3D12CommandSignature> drawIndirectSgn;
ComPtr<ID3D12CommandSignature> drawMeshIndirectSgn;
ComPtr<ID3D12CommandSignature> dispatchIndirectSgn;

DxAllocator allocator;
DxDescriptorAllocator descAlloc;
Expand Down
3 changes: 2 additions & 1 deletion Engine/gapi/flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ enum PipelineStage : uint8_t {
S_RtAs,
S_Compute,
S_Graphics,
S_Indirect,

S_First = S_Transfer,
S_Count = S_Graphics+1,
S_Count = S_Indirect+1,
};

}
5 changes: 5 additions & 0 deletions Engine/gapi/metal/mtcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@ void MtCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
encComp->dispatchThreadgroups(MTL::Size(x,y,z), localSize);
}

void MtCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
auto& ind = reinterpret_cast<const MtBuffer&>(indirect);
encComp->dispatchThreadgroups(ind.impl.get(), offset, localSize);
}

void MtCommandBuffer::implSetBytes(const void* bytes, size_t sz) {
auto& mtl = curLay->bindPush;
auto& l = curLay->pb;
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/metal/mtcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class MtCommandBuffer : public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;
void generateMipmap(AbstractGraphicsApi::Texture& image, uint32_t texWidth, uint32_t texHeight, uint32_t mipLevels) override;
Expand Down
6 changes: 2 additions & 4 deletions Engine/gapi/resourcestate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ void ResourceState::onUavUsage(NonUniqResId read, NonUniqResId write, PipelineSt
}

void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) {
const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr};
const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr};
const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr, ResourceAccess::Indirect};
const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr, ResourceAccess::None};
const ResourceAccess hv = (host ? ResourceAccess::TransferHost : ResourceAccess::None);

for(PipelineStage p = PipelineStage::S_First; p<PipelineStage::S_Count; p = PipelineStage(p+1)) {
Expand All @@ -86,8 +86,6 @@ void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) {
uavSrcBarrier = uavSrcBarrier | rd[p] | wr[p];
uavDstBarrier = uavDstBarrier | rd[st] | wr[st];

uavDstBarrier = uavDstBarrier | ResourceAccess::Indirect;

uavRead [st].depend[p] = NonUniqResId::I_None;
uavWrite[st].depend[p] = NonUniqResId::I_None;
}
Expand Down
15 changes: 13 additions & 2 deletions Engine/gapi/vulkan/vcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,17 @@ void VCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
vkCmdDispatch(impl,uint32_t(x),uint32_t(y),uint32_t(z));
}

void VCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

curUniforms->ssboBarriers(resState, PipelineStage::S_Compute);
// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
resState.flush(*this);

vkCmdDispatchIndirect(impl, ind.impl, VkDeviceSize(offset));
}

void VCommandBuffer::setBytes(AbstractGraphicsApi::CompPipeline& p, const void* data, size_t size) {
VCompPipeline& px=reinterpret_cast<VCompPipeline&>(p);
assert(size<=px.pushSize);
Expand Down Expand Up @@ -509,7 +520,7 @@ void VCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect, s
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);
vkCmdDrawIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0);
}
Expand All @@ -522,7 +533,7 @@ void VCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& ind
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);
device.vkCmdDrawMeshTasksIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0);
}
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/vulkan/vcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class VCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;

Expand Down
6 changes: 6 additions & 0 deletions Engine/graphics/encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ void Encoder<Tempest::CommandBuffer>::dispatchThreads(Size sz) {
dispatchThreads(size_t(sz.w), size_t(sz.h), 1);
}

void Encoder<CommandBuffer>::dispatchIndirect(const StorageBuffer& indirect, size_t offset) {
if (offset % 4 != 0)
throw std::system_error(Tempest::GraphicsErrc::InvalidStorageBuffer);
impl->dispatchIndirect(*indirect.impl.impl.handler, offset);
}

void Encoder<CommandBuffer>::setFramebuffer(std::initializer_list<AttachmentDesc> rd, AttachmentDesc zd) {
implSetFramebuffer(rd.begin(),rd.size(),&zd);
}
Expand Down
1 change: 1 addition & 0 deletions Engine/graphics/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Encoder<Tempest::CommandBuffer> {
void dispatchMeshThreads(Size sz);

void dispatch(size_t x, size_t y=1, size_t z=1);
void dispatchIndirect(const StorageBuffer& indirect, size_t offset);
void dispatchThreads(size_t x, size_t y=1, size_t z=1);
void dispatchThreads(Size sz);

Expand Down
31 changes: 31 additions & 0 deletions Tests/tests/resourcestate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ static std::string toString(ResourceAccess rs) {
text << "RtAsRead | ";
if((rs & ResourceAccess::RtAsWrite)==ResourceAccess::RtAsWrite)
text << "RtAsWrite | ";
if ((rs & ResourceAccess::Indirect) == ResourceAccess::Indirect)
text << "Indirect | ";


auto ret = text.str();
Expand Down Expand Up @@ -96,6 +98,7 @@ struct TestCommandBuffer : Tempest::AbstractGraphicsApi::CommandBuffer {
void drawIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {}

void dispatch (size_t x, size_t y, size_t z) override {}
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {}
};

void TestCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) {
Expand Down Expand Up @@ -171,5 +174,33 @@ TEST(main, ResourceStateBlas) {
rs.flush(cmd);
}

TEST(main, ResourceStateIndirect) {
TestCommandBuffer cmd;

ResourceState rs;

rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute);
rs.flush(cmd);

rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect);
rs.flush(cmd);
}

TEST(main, ResourceStateIndirectAndUAVWithSubsequentWriteAccess) {
TestCommandBuffer cmd;

ResourceState rs;

rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute);
rs.flush(cmd);

rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Compute);
rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect);
rs.flush(cmd);

rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute);
rs.flush(cmd);
}



0 comments on commit 35ebfc4

Please sign in to comment.