diff --git a/Engine/gapi/abstractgraphicsapi.h b/Engine/gapi/abstractgraphicsapi.h index faff9ee1..317c9928 100644 --- a/Engine/gapi/abstractgraphicsapi.h +++ b/Engine/gapi/abstractgraphicsapi.h @@ -585,6 +585,7 @@ namespace Tempest { virtual void dispatchMeshIndirect(const Buffer& indirect, size_t offset); virtual void dispatch(size_t x, size_t y, size_t z) = 0; + virtual void dispatchIndirect(const Buffer& indirect, size_t offset) = 0; }; using PBuffer = Detail::DSharedPtr; diff --git a/Engine/gapi/directx12/dxcommandbuffer.cpp b/Engine/gapi/directx12/dxcommandbuffer.cpp index cbced514..9d55f3b0 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.cpp +++ b/Engine/gapi/directx12/dxcommandbuffer.cpp @@ -590,6 +590,10 @@ void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) { impl->Dispatch(UINT(x),UINT(y),UINT(z)); } +void DxCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) { + throw std::runtime_error("dispatch indirect is not implemented for dx12"); + } + void DxCommandBuffer::setPipeline(Tempest::AbstractGraphicsApi::Pipeline& p) { DxPipeline& px = reinterpret_cast(p); pushBaseInstanceId = px.pushBaseInstanceId; @@ -617,14 +621,7 @@ void DxCommandBuffer::implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompu void DxCommandBuffer::restoreIndirect() { for(auto& i:indirectCmd) { - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = i; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - impl->ResourceBarrier(1, &barrier); + issueExplicitIndirectToCommonStateTransition(i); } indirectCmd.clear(); } @@ -743,21 +740,14 @@ void DxCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect, auto& sign = dev.drawIndirectSgn.get(); // block future writers - resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics); + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); //resState.flush(*this); if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) { indirectCmd.insert(ind.impl.get()); - - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = ind.impl.get(); - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - impl->ResourceBarrier(1, &barrier); + issueExplicitCommonToIndirectStateTransition(ind.impl.get()); } + impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0); } @@ -770,19 +760,11 @@ void DxCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& in auto& sign = dev.drawMeshIndirectSgn.get(); // block future writers - resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics); + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) { indirectCmd.insert(ind.impl.get()); - - D3D12_RESOURCE_BARRIER barrier; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; - barrier.Transition.pResource = ind.impl.get(); - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - impl->ResourceBarrier(1, &barrier); + issueExplicitCommonToIndirectStateTransition(ind.impl.get()); } impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0); } @@ -987,6 +969,28 @@ void DxCommandBuffer::copyNative(AbstractGraphicsApi::Buffer& dstBuf, size_t off impl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); } +void DxCommandBuffer::issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter) +{ + D3D12_RESOURCE_BARRIER barrier; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = buf; + barrier.Transition.StateAfter = stateAfter; + barrier.Transition.StateBefore = stateBefore; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + impl->ResourceBarrier(1, &barrier); + } + +void DxCommandBuffer::issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf) +{ + issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); + } + +void DxCommandBuffer::issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf) +{ + issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COMMON); + } + #endif diff --git a/Engine/gapi/directx12/dxcommandbuffer.h b/Engine/gapi/directx12/dxcommandbuffer.h index 0a724062..fa8f9a46 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.h +++ b/Engine/gapi/directx12/dxcommandbuffer.h @@ -71,6 +71,7 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer { void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void dispatch (size_t x, size_t y, size_t z) override; + void dispatchIndirect (const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override; @@ -133,6 +134,10 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer { void pushStage(Stage* cmd); void implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompute); void restoreIndirect(); + + void issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter); + void issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf); + void issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf); }; } diff --git a/Engine/gapi/directx12/dxdevice.cpp b/Engine/gapi/directx12/dxdevice.cpp index 27769132..e087e7fe 100644 --- a/Engine/gapi/directx12/dxdevice.cpp +++ b/Engine/gapi/directx12/dxdevice.cpp @@ -102,6 +102,10 @@ DxDevice::DxDevice(IDXGIAdapter1& adapter, const ApiEntry& dllApi) arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH; dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid(), reinterpret_cast(&drawMeshIndirectSgn))); } + + arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; + desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); + dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid(), reinterpret_cast(&dispatchIndirectSgn))); } allocator.setDevice(*this); diff --git a/Engine/gapi/directx12/dxdevice.h b/Engine/gapi/directx12/dxdevice.h index 252dc257..b75e812d 100644 --- a/Engine/gapi/directx12/dxdevice.h +++ b/Engine/gapi/directx12/dxdevice.h @@ -337,6 +337,7 @@ class DxDevice : public AbstractGraphicsApi::Device { ComPtr drawIndirectSgn; ComPtr drawMeshIndirectSgn; + ComPtr dispatchIndirectSgn; DxAllocator allocator; DxDescriptorAllocator descAlloc; diff --git a/Engine/gapi/flags.h b/Engine/gapi/flags.h index 2164ab58..0a1e4ddf 100644 --- a/Engine/gapi/flags.h +++ b/Engine/gapi/flags.h @@ -98,9 +98,10 @@ enum PipelineStage : uint8_t { S_RtAs, S_Compute, S_Graphics, + S_Indirect, S_First = S_Transfer, - S_Count = S_Graphics+1, + S_Count = S_Indirect+1, }; } diff --git a/Engine/gapi/metal/mtcommandbuffer.cpp b/Engine/gapi/metal/mtcommandbuffer.cpp index 13ec62b2..a248dee0 100644 --- a/Engine/gapi/metal/mtcommandbuffer.cpp +++ b/Engine/gapi/metal/mtcommandbuffer.cpp @@ -360,6 +360,11 @@ void MtCommandBuffer::dispatch(size_t x, size_t y, size_t z) { encComp->dispatchThreadgroups(MTL::Size(x,y,z), localSize); } +void MtCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) { + auto& ind = reinterpret_cast(indirect); + encComp->dispatchThreadgroups(ind.impl.get(), offset, localSize); + } + void MtCommandBuffer::implSetBytes(const void* bytes, size_t sz) { auto& mtl = curLay->bindPush; auto& l = curLay->pb; diff --git a/Engine/gapi/metal/mtcommandbuffer.h b/Engine/gapi/metal/mtcommandbuffer.h index 5c44da0e..4c06d35e 100644 --- a/Engine/gapi/metal/mtcommandbuffer.h +++ b/Engine/gapi/metal/mtcommandbuffer.h @@ -61,6 +61,7 @@ class MtCommandBuffer : public AbstractGraphicsApi::CommandBuffer { void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void dispatch (size_t x, size_t y, size_t z) override; + void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override; void generateMipmap(AbstractGraphicsApi::Texture& image, uint32_t texWidth, uint32_t texHeight, uint32_t mipLevels) override; diff --git a/Engine/gapi/resourcestate.cpp b/Engine/gapi/resourcestate.cpp index 140857ff..075cea61 100644 --- a/Engine/gapi/resourcestate.cpp +++ b/Engine/gapi/resourcestate.cpp @@ -75,8 +75,8 @@ void ResourceState::onUavUsage(NonUniqResId read, NonUniqResId write, PipelineSt } void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) { - const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr}; - const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr}; + const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr, ResourceAccess::Indirect}; + const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr, ResourceAccess::None}; const ResourceAccess hv = (host ? ResourceAccess::TransferHost : ResourceAccess::None); for(PipelineStage p = PipelineStage::S_First; p(indirect); + + curUniforms->ssboBarriers(resState, PipelineStage::S_Compute); + // block future writers + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); + resState.flush(*this); + + vkCmdDispatchIndirect(impl, ind.impl, VkDeviceSize(offset)); + } + void VCommandBuffer::setBytes(AbstractGraphicsApi::CompPipeline& p, const void* data, size_t size) { VCompPipeline& px=reinterpret_cast(p); assert(size<=px.pushSize); @@ -509,7 +520,7 @@ void VCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect, s const VBuffer& ind = reinterpret_cast(indirect); // block future writers - resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics); + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); //resState.flush(*this); vkCmdDrawIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0); } @@ -522,7 +533,7 @@ void VCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& ind const VBuffer& ind = reinterpret_cast(indirect); // block future writers - resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics); + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); //resState.flush(*this); device.vkCmdDrawMeshTasksIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0); } diff --git a/Engine/gapi/vulkan/vcommandbuffer.h b/Engine/gapi/vulkan/vcommandbuffer.h index 1ac296f0..21c868b5 100644 --- a/Engine/gapi/vulkan/vcommandbuffer.h +++ b/Engine/gapi/vulkan/vcommandbuffer.h @@ -74,6 +74,7 @@ class VCommandBuffer:public AbstractGraphicsApi::CommandBuffer { void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void dispatch (size_t x, size_t y, size_t z) override; + void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override; void barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override; diff --git a/Engine/graphics/encoder.cpp b/Engine/graphics/encoder.cpp index cdb093c3..37dbd318 100644 --- a/Engine/graphics/encoder.cpp +++ b/Engine/graphics/encoder.cpp @@ -198,6 +198,12 @@ void Encoder::dispatchThreads(Size sz) { dispatchThreads(size_t(sz.w), size_t(sz.h), 1); } +void Encoder::dispatchIndirect(const StorageBuffer& indirect, size_t offset) { + if (offset % 4 != 0) + throw std::system_error(Tempest::GraphicsErrc::InvalidStorageBuffer); + impl->dispatchIndirect(*indirect.impl.impl.handler, offset); + } + void Encoder::setFramebuffer(std::initializer_list rd, AttachmentDesc zd) { implSetFramebuffer(rd.begin(),rd.size(),&zd); } diff --git a/Engine/graphics/encoder.h b/Engine/graphics/encoder.h index c566a1ad..3b0f4855 100644 --- a/Engine/graphics/encoder.h +++ b/Engine/graphics/encoder.h @@ -94,6 +94,7 @@ class Encoder { void dispatchMeshThreads(Size sz); void dispatch(size_t x, size_t y=1, size_t z=1); + void dispatchIndirect(const StorageBuffer& indirect, size_t offset); void dispatchThreads(size_t x, size_t y=1, size_t z=1); void dispatchThreads(Size sz); diff --git a/Tests/tests/resourcestate_test.cpp b/Tests/tests/resourcestate_test.cpp index 62555f9c..12aabdcc 100644 --- a/Tests/tests/resourcestate_test.cpp +++ b/Tests/tests/resourcestate_test.cpp @@ -45,6 +45,8 @@ static std::string toString(ResourceAccess rs) { text << "RtAsRead | "; if((rs & ResourceAccess::RtAsWrite)==ResourceAccess::RtAsWrite) text << "RtAsWrite | "; + if ((rs & ResourceAccess::Indirect) == ResourceAccess::Indirect) + text << "Indirect | "; auto ret = text.str(); @@ -96,6 +98,7 @@ struct TestCommandBuffer : Tempest::AbstractGraphicsApi::CommandBuffer { void drawIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {} void dispatch (size_t x, size_t y, size_t z) override {} + void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {} }; void TestCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) { @@ -171,5 +174,33 @@ TEST(main, ResourceStateBlas) { rs.flush(cmd); } +TEST(main, ResourceStateIndirect) { + TestCommandBuffer cmd; + + ResourceState rs; + + rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); + rs.flush(cmd); + + rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect); + rs.flush(cmd); + } + +TEST(main, ResourceStateIndirectAndUAVWithSubsequentWriteAccess) { + TestCommandBuffer cmd; + + ResourceState rs; + + rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); + rs.flush(cmd); + + rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Compute); + rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect); + rs.flush(cmd); + + rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); + rs.flush(cmd); +} +