diff --git a/Engine/gapi/abstractgraphicsapi.h b/Engine/gapi/abstractgraphicsapi.h index 2aec2b6e..8be9cf2c 100644 --- a/Engine/gapi/abstractgraphicsapi.h +++ b/Engine/gapi/abstractgraphicsapi.h @@ -436,12 +436,12 @@ namespace Tempest { virtual void set (size_t id,AbstractGraphicsApi::Buffer* buf, size_t offset)=0; virtual void setTlas(size_t,AbstractGraphicsApi::AccelerationStructure*) {} virtual void set (size_t id, AbstractGraphicsApi::Texture** tex, size_t cnt, const Sampler2d& smp); - virtual void ssboBarriers(Detail::ResourceState& res) = 0; + virtual void ssboBarriers(Detail::ResourceState& res, PipelineStage st) = 0; }; struct EmptyDesc : Desc { void set(size_t,AbstractGraphicsApi::Texture*, const Sampler2d&, uint32_t){} void set(size_t,AbstractGraphicsApi::Buffer*, size_t){} - void ssboBarriers(Detail::ResourceState&){} + void ssboBarriers(Detail::ResourceState&,PipelineStage){} }; struct BarrierDesc { Buffer* buffer = nullptr; diff --git a/Engine/gapi/directx12/dxcommandbuffer.cpp b/Engine/gapi/directx12/dxcommandbuffer.cpp index 1aeff087..43de8e30 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.cpp +++ b/Engine/gapi/directx12/dxcommandbuffer.cpp @@ -59,9 +59,7 @@ static D3D12_RESOURCE_STATES nativeFormat(ResourceAccess f) { if((f&ResourceAccess::Uniform)==ResourceAccess::Uniform) st |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; - if((f&ResourceAccess::UavRead)==ResourceAccess::UavRead) - st |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - if((f&ResourceAccess::UavWrite)==ResourceAccess::UavWrite) + if((f&ResourceAccess::UavReadWriteAll)==ResourceAccess::None) st |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS; return D3D12_RESOURCE_STATES(st); @@ -439,7 +437,7 @@ void DxCommandBuffer::setUniforms(AbstractGraphicsApi::CompPipeline& /*p*/, Abst } void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) { - curUniforms->ssboBarriers(resState); + curUniforms->ssboBarriers(resState,PipelineStage::S_Compute); resState.flush(*this); impl->Dispatch(UINT(x),UINT(y),UINT(z)); } @@ -459,7 +457,7 @@ void DxCommandBuffer::setBytes(AbstractGraphicsApi::Pipeline& p, const void* dat } void DxCommandBuffer::setUniforms(AbstractGraphicsApi::Pipeline& /*p*/, AbstractGraphicsApi::Desc& u) { - u.ssboBarriers(resState); + u.ssboBarriers(resState,PipelineStage::S_Graphics); implSetUniforms(u,false); } @@ -535,7 +533,7 @@ void DxCommandBuffer::copy(AbstractGraphicsApi::Buffer& dstBuf, size_t offset, const UINT pitch = ((pitchBase+D3D12_TEXTURE_DATA_PITCH_ALIGNMENT-1)/D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)*D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; if(pitch==pitchBase && (offset%D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)==0) { - resState.onUavUsage(dst.nonUniqId,0); + resState.onUavUsage(dst.nonUniqId,0,PipelineStage::S_Tranfer); resState.setLayout(src,ResourceAccess::TransferSrc); resState.flush(*this); diff --git a/Engine/gapi/directx12/dxdescriptorarray.cpp b/Engine/gapi/directx12/dxdescriptorarray.cpp index 3f04119a..ac776f49 100644 --- a/Engine/gapi/directx12/dxdescriptorarray.cpp +++ b/Engine/gapi/directx12/dxdescriptorarray.cpp @@ -198,7 +198,7 @@ void DxDescriptorArray::setTlas(size_t id, AbstractGraphicsApi::AccelerationStru device.CreateShaderResourceView(nullptr,&desc,gpu); } -void DxDescriptorArray::ssboBarriers(ResourceState& res) { +void DxDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) { auto& lay = this->lay.handler->lay; if(T_UNLIKELY(uavUsage.durty)) { uavUsage.read = 0; @@ -216,7 +216,7 @@ void DxDescriptorArray::ssboBarriers(ResourceState& res) { } uavUsage.durty = false; } - res.onUavUsage(uavUsage); + res.onUavUsage(uavUsage,st); } #endif diff --git a/Engine/gapi/directx12/dxdescriptorarray.h b/Engine/gapi/directx12/dxdescriptorarray.h index 1634b1c5..b7604586 100644 --- a/Engine/gapi/directx12/dxdescriptorarray.h +++ b/Engine/gapi/directx12/dxdescriptorarray.h @@ -19,7 +19,7 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc { void set (size_t id, AbstractGraphicsApi::Texture *tex, const Sampler2d& smp, uint32_t mipLevel) override; void set (size_t id, AbstractGraphicsApi::Buffer* buf, size_t offset) override; void setTlas(size_t id, AbstractGraphicsApi::AccelerationStructure* tlas) override; - void ssboBarriers(Detail::ResourceState& res) override; + void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override; DSharedPtr lay; DxPipelineLay::PoolAllocation val; diff --git a/Engine/gapi/directx12/dxshader.cpp b/Engine/gapi/directx12/dxshader.cpp index 6a63d996..0de272f3 100644 --- a/Engine/gapi/directx12/dxshader.cpp +++ b/Engine/gapi/directx12/dxshader.cpp @@ -49,8 +49,8 @@ static const char* target(spv::ExecutionModel exec, uint32_t sm, char* buf) { } static int calcShaderModel(const spirv_cross::CompilerHLSL& comp) { - uint32_t shader_model = 50; - //uint32_t shader_model = 65; + //uint32_t shader_model = 50; + uint32_t shader_model = 65; for(auto& cap:comp.get_declared_capabilities()) { switch(cap) { case spv::CapabilityRayQueryKHR: diff --git a/Engine/gapi/directx12api.cpp b/Engine/gapi/directx12api.cpp index 4e30f8f2..581f278f 100644 --- a/Engine/gapi/directx12api.cpp +++ b/Engine/gapi/directx12api.cpp @@ -373,7 +373,7 @@ void DirectX12Api::readPixels(Device* d, Pixmap& out, const PTexture t, uint32_t row = bsz.w*uint32_t(bpb); const uint32_t pith = ((row+D3D12_TEXTURE_DATA_PITCH_ALIGNMENT-1)/D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)*D3D12_TEXTURE_DATA_PITCH_ALIGNMENT; Detail::DxBuffer stage = dx.allocator.alloc(nullptr,bsz.h,bsz.w*bpb,pith,MemUsage::TransferDst,BufferHeap::Readback); - ResourceAccess defLay = storageImg ? ResourceAccess::UavRead : ResourceAccess::Sampler; + ResourceAccess defLay = storageImg ? (ResourceAccess::UavReadGr | ResourceAccess::UavReadComp) : ResourceAccess::Sampler; auto cmd = dx.dataMgr().get(); cmd->begin(); diff --git a/Engine/gapi/flags.h b/Engine/gapi/flags.h index da045124..baf8b65e 100644 --- a/Engine/gapi/flags.h +++ b/Engine/gapi/flags.h @@ -46,9 +46,17 @@ enum ResourceAccess : uint32_t { Index = 1 << 7, Vertex = 1 << 8, Uniform = 1 << 9, - UavRead = 1 << 10, - UavWrite = 1 << 11, - UavReadWrite = (UavRead | UavWrite), + + UavReadComp = 1 << 10, + UavWriteComp = 1 << 11, + + UavReadGr = 1 << 12, + UavWriteGr = 1 << 13, + + // for debug view + UavReadWriteComp = (UavReadComp | UavWriteComp), + UavReadWriteGr = (UavReadGr | UavWriteGr ), + UavReadWriteAll = (UavReadWriteGr | UavReadWriteComp), }; inline ResourceAccess operator | (ResourceAccess a,const ResourceAccess& b) { @@ -59,4 +67,11 @@ inline ResourceAccess operator & (ResourceAccess a,const ResourceAccess& b) { return ResourceAccess(uint32_t(a)&uint32_t(b)); } +enum PipelineStage : uint8_t { + S_Tranfer, + S_Compute, + S_Graphics, + S_Count, + }; + } diff --git a/Engine/gapi/resourcestate.cpp b/Engine/gapi/resourcestate.cpp index 55aaec77..cdc61368 100644 --- a/Engine/gapi/resourcestate.cpp +++ b/Engine/gapi/resourcestate.cpp @@ -56,38 +56,45 @@ void ResourceState::forceLayout(AbstractGraphicsApi::Texture& img) { } } -void ResourceState::onUavUsage(uint64_t read, uint64_t write) { +void ResourceState::onUavUsage(uint32_t read, uint32_t write, PipelineStage st) { ResourceState::Usage uavUsage; uavUsage.read = read; uavUsage.write = write; - onUavUsage(uavUsage); + onUavUsage(uavUsage,st); } -void ResourceState::onUavUsage(const Usage& u) { - if((uavUsage.write & u.read) !=0 || - (uavUsage.write & u.write)!=0 ){ - // RaW, WaW barrier - needUavRBarrier = true; - needUavWBarrier = true; - uavUsage = u; - } - else if((uavUsage.read & u.write)!=0) { - // WaR barrier - needUavRBarrier = true; - uavUsage = u; - } - else { - uavUsage.read |= u.read; - uavUsage.write |= u.write; +void ResourceState::onUavUsage(const Usage& u, PipelineStage st) { + ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::None,ResourceAccess::UavReadComp, ResourceAccess::UavReadGr}; + ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::None,ResourceAccess::UavWriteComp,ResourceAccess::UavWriteGr}; + + for(PipelineStage p = PipelineStage::S_Tranfer; p imgState; - ResourceState::Usage uavUsage; - bool needUavRBarrier = false; - bool needUavWBarrier = false; + + ResourceState::Usage uavUsage[PipelineStage::S_Count] = {}; + ResourceAccess uavPrev = ResourceAccess::None; }; } diff --git a/Engine/gapi/vulkan/vcommandbuffer.cpp b/Engine/gapi/vulkan/vcommandbuffer.cpp index 70df0fd9..fb28a6e5 100644 --- a/Engine/gapi/vulkan/vcommandbuffer.cpp +++ b/Engine/gapi/vulkan/vcommandbuffer.cpp @@ -88,12 +88,22 @@ static void toStage(VkPipelineStageFlags2KHR& stage, VkAccessFlagBits2KHR& acces acc |= VK_ACCESS_UNIFORM_READ_BIT; } - if((rs&ResourceAccess::UavRead)==ResourceAccess::UavRead) { - ret |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + // memory barriers + if((rs&ResourceAccess::UavReadGr)==ResourceAccess::UavReadGr) { + ret |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; acc |= VK_ACCESS_SHADER_READ_BIT; } - if((rs&ResourceAccess::UavWrite)==ResourceAccess::UavWrite) { - ret |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + if((rs&ResourceAccess::UavWriteGr)==ResourceAccess::UavWriteGr) { + ret |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT; + acc |= VK_ACCESS_SHADER_WRITE_BIT; + } + + if((rs&ResourceAccess::UavReadComp)==ResourceAccess::UavReadComp) { + ret |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + acc |= VK_ACCESS_SHADER_READ_BIT; + } + if((rs&ResourceAccess::UavWriteComp)==ResourceAccess::UavWriteComp) { + ret |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; acc |= VK_ACCESS_SHADER_WRITE_BIT; } @@ -132,9 +142,7 @@ static VkImageLayout toLayout(ResourceAccess rs) { if((rs&ResourceAccess::Vertex)==ResourceAccess::Uniform) return VK_IMAGE_LAYOUT_GENERAL; - if((rs&ResourceAccess::UavRead)==ResourceAccess::UavRead) - return VK_IMAGE_LAYOUT_GENERAL; - if((rs&ResourceAccess::UavWrite)==ResourceAccess::UavWrite) + if((rs&ResourceAccess::UavReadWriteAll)!=ResourceAccess::None) return VK_IMAGE_LAYOUT_GENERAL; return VK_IMAGE_LAYOUT_UNDEFINED; @@ -355,7 +363,7 @@ void VCommandBuffer::setUniforms(AbstractGraphicsApi::Pipeline &p, AbstractGraph VPipeline& px=reinterpret_cast(p); VDescriptorArray& ux=reinterpret_cast(u); curUniforms = &ux; - curUniforms->ssboBarriers(resState); + curUniforms->ssboBarriers(resState,PipelineStage::S_Graphics); vkCmdBindDescriptorSets(impl,VK_PIPELINE_BIND_POINT_GRAPHICS, px.pipelineLayout,0, 1,&ux.desc, @@ -369,7 +377,7 @@ void VCommandBuffer::setComputePipeline(AbstractGraphicsApi::CompPipeline& p) { } void VCommandBuffer::dispatch(size_t x, size_t y, size_t z) { - curUniforms->ssboBarriers(resState); + curUniforms->ssboBarriers(resState,PipelineStage::S_Compute); resState.flush(*this); vkCmdDispatch(impl,uint32_t(x),uint32_t(y),uint32_t(z)); } @@ -667,7 +675,7 @@ void VCommandBuffer::buildTlas(VkAccelerationStructureKHR dest, void VCommandBuffer::copy(AbstractGraphicsApi::Buffer& dst, size_t offset, AbstractGraphicsApi::Texture& src, uint32_t width, uint32_t height, uint32_t mip) { auto& nDst = reinterpret_cast(dst); - resState.onUavUsage(nDst.nonUniqId,0); + resState.onUavUsage(nDst.nonUniqId,0,PipelineStage::S_Tranfer); resState.setLayout(src,ResourceAccess::TransferSrc); resState.flush(*this); @@ -776,9 +784,10 @@ void VCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_ info.bufferMemoryBarrierCount = bufCount; info.pImageMemoryBarriers = imgBarrier; info.imageMemoryBarrierCount = imgCount; - info.pMemoryBarriers = &memBarrier; - if(memBarrier.sType==VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR) + if(memBarrier.sType==VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR) { + info.pMemoryBarriers = &memBarrier; info.memoryBarrierCount++; + } vkCmdPipelineBarrier2(impl,&info); } diff --git a/Engine/gapi/vulkan/vdescriptorarray.cpp b/Engine/gapi/vulkan/vdescriptorarray.cpp index c50605cf..8140a7f7 100644 --- a/Engine/gapi/vulkan/vdescriptorarray.cpp +++ b/Engine/gapi/vulkan/vdescriptorarray.cpp @@ -230,7 +230,7 @@ void VDescriptorArray::set(size_t id, AbstractGraphicsApi::Texture** t, size_t c vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr); } -void VDescriptorArray::ssboBarriers(ResourceState& res) { +void VDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) { auto& lay = this->lay.handler->lay; if(T_UNLIKELY(uavUsage.durty)) { uavUsage.read = 0; @@ -248,7 +248,7 @@ void VDescriptorArray::ssboBarriers(ResourceState& res) { } uavUsage.durty = false; } - res.onUavUsage(uavUsage); + res.onUavUsage(uavUsage,st); } void VDescriptorArray::addPoolSize(VkDescriptorPoolSize *p, size_t &sz, uint32_t cnt, VkDescriptorType elt) { diff --git a/Engine/gapi/vulkan/vdescriptorarray.h b/Engine/gapi/vulkan/vdescriptorarray.h index 388b6d8a..6eb7b2fb 100644 --- a/Engine/gapi/vulkan/vdescriptorarray.h +++ b/Engine/gapi/vulkan/vdescriptorarray.h @@ -23,7 +23,7 @@ class VDescriptorArray : public AbstractGraphicsApi::Desc { void set (size_t id, AbstractGraphicsApi::Texture** tex, size_t cnt, const Sampler2d& smp) override; - void ssboBarriers(Detail::ResourceState& res) override; + void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override; VkDescriptorSet desc=VK_NULL_HANDLE; diff --git a/Engine/gapi/vulkanapi.cpp b/Engine/gapi/vulkanapi.cpp index d752134d..d0587390 100644 --- a/Engine/gapi/vulkanapi.cpp +++ b/Engine/gapi/vulkanapi.cpp @@ -224,7 +224,7 @@ void VulkanApi::readPixels(AbstractGraphicsApi::Device *d, Pixmap& out, const PT const size_t size = bsz.w*bsz.h*bpb; Detail::VBuffer stage = dx.allocator.alloc(nullptr,size,1,1,MemUsage::TransferDst,BufferHeap::Readback); - ResourceAccess defLay = storageImg ? ResourceAccess::UavRead : ResourceAccess::Sampler; + ResourceAccess defLay = storageImg ? (ResourceAccess::UavReadGr | ResourceAccess::UavReadComp) : ResourceAccess::Sampler; auto cmd = dx.dataMgr().get(); cmd->begin();