diff --git a/Engine/gapi/directx12/dxcommandbuffer.cpp b/Engine/gapi/directx12/dxcommandbuffer.cpp index 33b897f0..27186e18 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.cpp +++ b/Engine/gapi/directx12/dxcommandbuffer.cpp @@ -592,7 +592,17 @@ void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) { } void DxCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) { - throw std::runtime_error("dispatch indirect is not implemented for dx12"); + const DxBuffer& ind = reinterpret_cast(indirect); + auto& sign = dev.dispatchIndirectSgn.get(); + + curUniforms->ssboBarriers(resState, PipelineStage::S_Compute); + // block future writers + resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect); + resState.flush(*this); + + barrier(indirect, ResourceAccess::UavReadWriteAll, ResourceAccess::Indirect); + impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0); + barrier(indirect, ResourceAccess::Indirect, ResourceAccess::UavReadWriteAll); } void DxCommandBuffer::setPipeline(Tempest::AbstractGraphicsApi::Pipeline& p) { diff --git a/Engine/gapi/resourcestate.cpp b/Engine/gapi/resourcestate.cpp index 722b79c4..08b64d99 100644 --- a/Engine/gapi/resourcestate.cpp +++ b/Engine/gapi/resourcestate.cpp @@ -54,6 +54,13 @@ void ResourceState::setLayout(AbstractGraphicsApi::Texture& a, ResourceAccess la img.outdated = true; } +void ResourceState::setLayout(AbstractGraphicsApi::Buffer& a, ResourceAccess lay) { + ResourceAccess def = ResourceAccess::UavReadWriteAll; + BufState& buf = findBuf(&a,def); + buf.next = lay; + buf.outdated = true; + } + void ResourceState::forceLayout(AbstractGraphicsApi::Texture& img) { for(auto& i:imgState) { if(i.sw==nullptr && i.id==0 && i.img==&img) { @@ -115,6 +122,7 @@ void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) { void ResourceState::joinWriters(PipelineStage st) { ResourceState::Usage u = {NonUniqResId(-1), NonUniqResId::I_None, false}; onUavUsage(u, st); + // uavDstBarrier = uavDstBarrier | ResourceAccess::Indirect; } void ResourceState::clearReaders() { @@ -213,6 +221,20 @@ ResourceState::ImgState& ResourceState::findImg(AbstractGraphicsApi::Texture* im return imgState.back(); } +ResourceState::BufState& ResourceState::findBuf(AbstractGraphicsApi::Buffer* buf, ResourceAccess def) { + for(auto& i:bufState) { + if(i.buf==buf) + return i; + } + BufState s={}; + s.buf = buf; + s.last = def; + s.next = ResourceAccess::UavRead; + s.outdated = false; + bufState.push_back(s); + return bufState.back(); + } + void ResourceState::emitBarriers(AbstractGraphicsApi::CommandBuffer& cmd, AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) { if(cnt==0) return; diff --git a/Engine/gapi/resourcestate.h b/Engine/gapi/resourcestate.h index 6cb4ae9d..535fb149 100644 --- a/Engine/gapi/resourcestate.h +++ b/Engine/gapi/resourcestate.h @@ -23,6 +23,7 @@ class ResourceState { AbstractGraphicsApi::Swapchain** sw, const uint32_t* imgId); void setLayout (AbstractGraphicsApi::Swapchain& s, uint32_t id, ResourceAccess lay, bool discard); void setLayout (AbstractGraphicsApi::Texture& a, ResourceAccess lay, bool discard = false); + void setLayout (AbstractGraphicsApi::Buffer& a, ResourceAccess lay); void onTranferUsage(NonUniqResId read, NonUniqResId write, bool host); void onUavUsage (NonUniqResId read, NonUniqResId write, PipelineStage st); @@ -47,11 +48,22 @@ class ResourceState { bool outdated = false; }; + struct BufState { + AbstractGraphicsApi::Buffer* buf = nullptr; + + ResourceAccess last = ResourceAccess::None; + ResourceAccess next = ResourceAccess::None; + + bool outdated = false; + }; + void fillReads(); ImgState& findImg(AbstractGraphicsApi::Texture* img, AbstractGraphicsApi::Swapchain* sw, uint32_t id, ResourceAccess def, bool discard); + BufState& findBuf(AbstractGraphicsApi::Buffer* buf, ResourceAccess def); void emitBarriers(AbstractGraphicsApi::CommandBuffer& cmd, AbstractGraphicsApi::BarrierDesc* desc, size_t cnt); std::vector imgState; + std::vector bufState; struct Stage { NonUniqResId depend[PipelineStage::S_Count]; diff --git a/Tests/tests/gapi/directx_test.cpp b/Tests/tests/gapi/directx_test.cpp index e07414d6..1f6c2fda 100644 --- a/Tests/tests/gapi/directx_test.cpp +++ b/Tests/tests/gapi/directx_test.cpp @@ -278,6 +278,12 @@ TEST(DirectX12Api,MeshShader) { #endif } +TEST(DirectX12Api,DispathIndirect) { +#if defined(_MSC_VER) + GapiTestCommon::DispathIndirect(); +#endif + } + TEST(DirectX12Api,SpirvDefect_Link) { #if defined(_MSC_VER) using namespace Tempest; diff --git a/Tests/tests/gapi/gapi_test_common.h b/Tests/tests/gapi/gapi_test_common.h index d404ac95..c8cc833b 100644 --- a/Tests/tests/gapi/gapi_test_common.h +++ b/Tests/tests/gapi/gapi_test_common.h @@ -2041,4 +2041,50 @@ void MeshComputePrototype(const char* outImg) { throw; } } + +template +void DispathIndirect() { + using namespace Tempest; + + try { + GraphicsApi api{ApiFlags::Validation}; + Device device(api); + + Vec4 inputCpu[3] = {Vec4(0,1,2,3),Vec4(4,5,6,7),Vec4(8,9,10,11)}; + + auto input = device.ssbo(inputCpu, sizeof(inputCpu)); + auto output = device.ssbo(Uninitialized, sizeof(inputCpu)); + + auto cs = device.shader("shader/simple_test.comp.sprv"); + auto pso = device.pipeline(cs); + + auto ubo = device.descriptors(pso.layout()); + ubo.set(0,input); + ubo.set(1,output); + + IVec3 argCpu = {3, 1, 1}; + auto arg = device.ssbo(&argCpu, sizeof(argCpu)); + auto cmd = device.commandBuffer(); + { + auto enc = cmd.startEncoding(device); + enc.setUniforms(pso,ubo); + enc.dispatchIndirect(arg, 0); + } + + auto sync = device.fence(); + device.submit(cmd,sync); + sync.wait(); + + Vec4 outputCpu[3] = {}; + device.readBytes(output,outputCpu,sizeof(outputCpu)); + + for(size_t i=0; i<3; ++i) + EXPECT_EQ(outputCpu[i],inputCpu[i]); + } + catch(std::system_error& e) { + if(e.code()==Tempest::GraphicsErrc::NoDevice) + Log::d("Skipping graphics testcase: ", e.what()); else + throw; + } + } } diff --git a/Tests/tests/gapi/metal_test.cpp b/Tests/tests/gapi/metal_test.cpp index df642e5f..2beab4f8 100644 --- a/Tests/tests/gapi/metal_test.cpp +++ b/Tests/tests/gapi/metal_test.cpp @@ -241,3 +241,9 @@ TEST(MetalApi,DISABLED_MeshShader) { GapiTestCommon::MeshShader("MetalApi_MeshShader.png"); #endif } + +TEST(MetalApi,DispathIndirect) { +#if defined(__OSX__) + GapiTestCommon::DispathIndirect(); +#endif + } diff --git a/Tests/tests/gapi/vulkan_test.cpp b/Tests/tests/gapi/vulkan_test.cpp index a2663f1c..194051d0 100644 --- a/Tests/tests/gapi/vulkan_test.cpp +++ b/Tests/tests/gapi/vulkan_test.cpp @@ -293,3 +293,9 @@ TEST(VulkanApi,DISABLED_MeshComputePrototype) { GapiTestCommon::MeshComputePrototype("VulkanApi_MeshComputePrototype.png"); #endif } + +TEST(VulkanApi,DispathIndirect) { +#if !defined(__OSX__) + GapiTestCommon::DispathIndirect(); +#endif + } diff --git a/Tests/tests/resourcestate_test.cpp b/Tests/tests/resourcestate_test.cpp index 12aabdcc..28a52f20 100644 --- a/Tests/tests/resourcestate_test.cpp +++ b/Tests/tests/resourcestate_test.cpp @@ -131,6 +131,7 @@ TEST(main, ResourceStateJoin) { rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); rs.flush(cmd); + rs.joinWriters(PipelineStage::S_Indirect); rs.joinWriters(PipelineStage::S_Graphics); rs.flush(cmd); } @@ -139,10 +140,12 @@ TEST(main, ResourceStateJoin) { rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_RtAs); rs.flush(cmd); + rs.joinWriters(PipelineStage::S_Indirect); rs.joinWriters(PipelineStage::S_Graphics); rs.flush(cmd); rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Graphics); + rs.joinWriters(PipelineStage::S_Indirect); rs.joinWriters(PipelineStage::S_Graphics); rs.flush(cmd); } @@ -187,20 +190,20 @@ TEST(main, ResourceStateIndirect) { } TEST(main, ResourceStateIndirectAndUAVWithSubsequentWriteAccess) { - TestCommandBuffer cmd; + TestCommandBuffer cmd; - ResourceState rs; + ResourceState rs; - rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); - rs.flush(cmd); + rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); + rs.flush(cmd); - rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Compute); - rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect); - rs.flush(cmd); + rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Compute); + rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect); + rs.flush(cmd); - rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); - rs.flush(cmd); -} + rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute); + rs.flush(cmd); + }