Skip to content

Commit

Permalink
UAV tracker in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
Try committed Jun 1, 2022
1 parent 9922df8 commit c65cfa2
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 73 deletions.
4 changes: 2 additions & 2 deletions Engine/gapi/abstractgraphicsapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,12 +436,12 @@ namespace Tempest {
virtual void set (size_t id,AbstractGraphicsApi::Buffer* buf, size_t offset)=0;
virtual void setTlas(size_t,AbstractGraphicsApi::AccelerationStructure*) {}
virtual void set (size_t id, AbstractGraphicsApi::Texture** tex, size_t cnt, const Sampler2d& smp);
virtual void ssboBarriers(Detail::ResourceState& res) = 0;
virtual void ssboBarriers(Detail::ResourceState& res, PipelineStage st) = 0;
};
struct EmptyDesc : Desc {
void set(size_t,AbstractGraphicsApi::Texture*, const Sampler2d&, uint32_t){}
void set(size_t,AbstractGraphicsApi::Buffer*, size_t){}
void ssboBarriers(Detail::ResourceState&){}
void ssboBarriers(Detail::ResourceState&,PipelineStage){}
};
struct BarrierDesc {
Buffer* buffer = nullptr;
Expand Down
10 changes: 4 additions & 6 deletions Engine/gapi/directx12/dxcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ static D3D12_RESOURCE_STATES nativeFormat(ResourceAccess f) {
if((f&ResourceAccess::Uniform)==ResourceAccess::Uniform)
st |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;

if((f&ResourceAccess::UavRead)==ResourceAccess::UavRead)
st |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
if((f&ResourceAccess::UavWrite)==ResourceAccess::UavWrite)
if((f&ResourceAccess::UavReadWriteAll)==ResourceAccess::None)
st |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS;

return D3D12_RESOURCE_STATES(st);
Expand Down Expand Up @@ -439,7 +437,7 @@ void DxCommandBuffer::setUniforms(AbstractGraphicsApi::CompPipeline& /*p*/, Abst
}

void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
curUniforms->ssboBarriers(resState);
curUniforms->ssboBarriers(resState,PipelineStage::S_Compute);
resState.flush(*this);
impl->Dispatch(UINT(x),UINT(y),UINT(z));
}
Expand All @@ -459,7 +457,7 @@ void DxCommandBuffer::setBytes(AbstractGraphicsApi::Pipeline& p, const void* dat
}

void DxCommandBuffer::setUniforms(AbstractGraphicsApi::Pipeline& /*p*/, AbstractGraphicsApi::Desc& u) {
u.ssboBarriers(resState);
u.ssboBarriers(resState,PipelineStage::S_Graphics);
implSetUniforms(u,false);
}

Expand Down Expand Up @@ -535,7 +533,7 @@ void DxCommandBuffer::copy(AbstractGraphicsApi::Buffer& dstBuf, size_t offset,
const UINT pitch = ((pitchBase+D3D12_TEXTURE_DATA_PITCH_ALIGNMENT-1)/D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)*D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;

if(pitch==pitchBase && (offset%D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)==0) {
resState.onUavUsage(dst.nonUniqId,0);
resState.onUavUsage(dst.nonUniqId,0,PipelineStage::S_Tranfer);
resState.setLayout(src,ResourceAccess::TransferSrc);
resState.flush(*this);

Expand Down
4 changes: 2 additions & 2 deletions Engine/gapi/directx12/dxdescriptorarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void DxDescriptorArray::setTlas(size_t id, AbstractGraphicsApi::AccelerationStru
device.CreateShaderResourceView(nullptr,&desc,gpu);
}

void DxDescriptorArray::ssboBarriers(ResourceState& res) {
void DxDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) {
auto& lay = this->lay.handler->lay;
if(T_UNLIKELY(uavUsage.durty)) {
uavUsage.read = 0;
Expand All @@ -216,7 +216,7 @@ void DxDescriptorArray::ssboBarriers(ResourceState& res) {
}
uavUsage.durty = false;
}
res.onUavUsage(uavUsage);
res.onUavUsage(uavUsage,st);
}

#endif
2 changes: 1 addition & 1 deletion Engine/gapi/directx12/dxdescriptorarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc {
void set (size_t id, AbstractGraphicsApi::Texture *tex, const Sampler2d& smp, uint32_t mipLevel) override;
void set (size_t id, AbstractGraphicsApi::Buffer* buf, size_t offset) override;
void setTlas(size_t id, AbstractGraphicsApi::AccelerationStructure* tlas) override;
void ssboBarriers(Detail::ResourceState& res) override;
void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override;

DSharedPtr<DxPipelineLay*> lay;
DxPipelineLay::PoolAllocation val;
Expand Down
4 changes: 2 additions & 2 deletions Engine/gapi/directx12/dxshader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ static const char* target(spv::ExecutionModel exec, uint32_t sm, char* buf) {
}

static int calcShaderModel(const spirv_cross::CompilerHLSL& comp) {
uint32_t shader_model = 50;
//uint32_t shader_model = 65;
//uint32_t shader_model = 50;
uint32_t shader_model = 65;
for(auto& cap:comp.get_declared_capabilities()) {
switch(cap) {
case spv::CapabilityRayQueryKHR:
Expand Down
2 changes: 1 addition & 1 deletion Engine/gapi/directx12api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ void DirectX12Api::readPixels(Device* d, Pixmap& out, const PTexture t,
uint32_t row = bsz.w*uint32_t(bpb);
const uint32_t pith = ((row+D3D12_TEXTURE_DATA_PITCH_ALIGNMENT-1)/D3D12_TEXTURE_DATA_PITCH_ALIGNMENT)*D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
Detail::DxBuffer stage = dx.allocator.alloc(nullptr,bsz.h,bsz.w*bpb,pith,MemUsage::TransferDst,BufferHeap::Readback);
ResourceAccess defLay = storageImg ? ResourceAccess::UavRead : ResourceAccess::Sampler;
ResourceAccess defLay = storageImg ? (ResourceAccess::UavReadGr | ResourceAccess::UavReadComp) : ResourceAccess::Sampler;

auto cmd = dx.dataMgr().get();
cmd->begin();
Expand Down
21 changes: 18 additions & 3 deletions Engine/gapi/flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,17 @@ enum ResourceAccess : uint32_t {
Index = 1 << 7,
Vertex = 1 << 8,
Uniform = 1 << 9,
UavRead = 1 << 10,
UavWrite = 1 << 11,
UavReadWrite = (UavRead | UavWrite),

UavReadComp = 1 << 10,
UavWriteComp = 1 << 11,

UavReadGr = 1 << 12,
UavWriteGr = 1 << 13,

// for debug view
UavReadWriteComp = (UavReadComp | UavWriteComp),
UavReadWriteGr = (UavReadGr | UavWriteGr ),
UavReadWriteAll = (UavReadWriteGr | UavReadWriteComp),
};

inline ResourceAccess operator | (ResourceAccess a,const ResourceAccess& b) {
Expand All @@ -59,4 +67,11 @@ inline ResourceAccess operator & (ResourceAccess a,const ResourceAccess& b) {
return ResourceAccess(uint32_t(a)&uint32_t(b));
}

enum PipelineStage : uint8_t {
S_Tranfer,
S_Compute,
S_Graphics,
S_Count,
};

}
74 changes: 41 additions & 33 deletions Engine/gapi/resourcestate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,38 +56,45 @@ void ResourceState::forceLayout(AbstractGraphicsApi::Texture& img) {
}
}

void ResourceState::onUavUsage(uint64_t read, uint64_t write) {
void ResourceState::onUavUsage(uint32_t read, uint32_t write, PipelineStage st) {
ResourceState::Usage uavUsage;
uavUsage.read = read;
uavUsage.write = write;
onUavUsage(uavUsage);
onUavUsage(uavUsage,st);
}

void ResourceState::onUavUsage(const Usage& u) {
if((uavUsage.write & u.read) !=0 ||
(uavUsage.write & u.write)!=0 ){
// RaW, WaW barrier
needUavRBarrier = true;
needUavWBarrier = true;
uavUsage = u;
}
else if((uavUsage.read & u.write)!=0) {
// WaR barrier
needUavRBarrier = true;
uavUsage = u;
}
else {
uavUsage.read |= u.read;
uavUsage.write |= u.write;
void ResourceState::onUavUsage(const Usage& u, PipelineStage st) {
ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::None,ResourceAccess::UavReadComp, ResourceAccess::UavReadGr};
ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::None,ResourceAccess::UavWriteComp,ResourceAccess::UavWriteGr};

for(PipelineStage p = PipelineStage::S_Tranfer; p<PipelineStage::S_Count; p = PipelineStage(p+1)) {
auto& usagePrev = uavUsage[p];
if((usagePrev.write & u.write)!=0 ||
(usagePrev.write & u.read) !=0) {
// WaW, RaW barrier
uavPrev = uavPrev | rd[p];
uavPrev = uavPrev | wr[p];
uavUsage[p] = Usage();
}
else if((usagePrev.read & u.write)!=0) {
// WaR barrier
uavPrev = uavPrev | rd[p];
uavUsage[p].read = 0;
}
}
uavUsage[st].read |= u.read;
uavUsage[st].write |= u.write;
}

void ResourceState::joinCompute(AbstractGraphicsApi::CommandBuffer& cmd) {
if(/*uavUsage.read!=0 ||*/ uavUsage.write!=0) {
auto& usage = uavUsage[PipelineStage::S_Compute];
if(/*uavUsage.read!=0 ||*/ usage.write!=0) {
// NOTE: VS/FS side effects will require WaR barrier
needUavRBarrier = (uavUsage.read !=0);
needUavWBarrier = (uavUsage.write!=0);
uavUsage = ResourceState::Usage();
if(usage.read !=0)
uavPrev = uavPrev | ResourceAccess::UavReadComp;
if(usage.write !=0)
uavPrev = uavPrev | ResourceAccess::UavWriteComp;
usage = ResourceState::Usage();
}
}

Expand Down Expand Up @@ -116,23 +123,25 @@ void ResourceState::flush(AbstractGraphicsApi::CommandBuffer& cmd) {
}
}

if(needUavRBarrier || needUavWBarrier) {
if(uavPrev!=ResourceAccess::None) {
auto& b = barrier[barrierCnt];
b.buffer = nullptr;
if(needUavWBarrier)
b.prev = ResourceAccess::UavReadWrite; else
b.prev = ResourceAccess::UavRead;
b.next = ResourceAccess::UavReadWrite;
b.prev = uavPrev;
b.next = ResourceAccess::UavReadWriteAll; //TODO
++barrierCnt;
needUavRBarrier = false;
needUavWBarrier = false;
uavPrev = ResourceAccess::None;
}
emitBarriers(cmd,barrier,barrierCnt);
}

void ResourceState::finalize(AbstractGraphicsApi::CommandBuffer& cmd) {
if(imgState.size()==0 && needUavRBarrier==false && needUavWBarrier==false)
for(auto& i:uavUsage)
if(i.write!=0)
uavPrev = ResourceAccess::UavReadWriteAll;

if(imgState.size()==0 && uavPrev==ResourceAccess::None)
return; // early-out

for(auto& i:imgState) {
if(i.sw==nullptr)
continue;
Expand All @@ -142,9 +151,8 @@ void ResourceState::finalize(AbstractGraphicsApi::CommandBuffer& cmd) {
flush(cmd);
imgState.reserve(imgState.size());
imgState.clear();
needUavRBarrier = false;
needUavWBarrier = false;
uavUsage = ResourceState::Usage();
for(auto& i:uavUsage)
i = ResourceState::Usage();
}

ResourceState::ImgState& ResourceState::findImg(AbstractGraphicsApi::Texture* img, AbstractGraphicsApi::Swapchain* sw, uint32_t id,
Expand Down
14 changes: 7 additions & 7 deletions Engine/gapi/resourcestate.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class ResourceState {
ResourceState() = default;

struct Usage {
uint64_t read = 0;
uint64_t write = 0;
uint32_t read = 0;
uint32_t write = 0;
bool durty = false;
};

Expand All @@ -26,8 +26,8 @@ class ResourceState {
void setLayout (AbstractGraphicsApi::Swapchain& s, uint32_t id, ResourceAccess lay, bool discard);
void setLayout (AbstractGraphicsApi::Texture& a, ResourceAccess lay, bool discard = false);

void onUavUsage (uint64_t read, uint64_t write);
void onUavUsage (const ResourceState::Usage& uavUsage);
void onUavUsage (uint32_t read, uint32_t write, PipelineStage st);
void onUavUsage (const ResourceState::Usage& uavUsage, PipelineStage st);
void forceLayout(AbstractGraphicsApi::Texture& a);

void joinCompute(AbstractGraphicsApi::CommandBuffer& cmd);
Expand All @@ -51,9 +51,9 @@ class ResourceState {
void emitBarriers(AbstractGraphicsApi::CommandBuffer& cmd, AbstractGraphicsApi::BarrierDesc* desc, size_t cnt);

std::vector<ImgState> imgState;
ResourceState::Usage uavUsage;
bool needUavRBarrier = false;
bool needUavWBarrier = false;

ResourceState::Usage uavUsage[PipelineStage::S_Count] = {};
ResourceAccess uavPrev = ResourceAccess::None;
};

}
Expand Down
33 changes: 21 additions & 12 deletions Engine/gapi/vulkan/vcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,22 @@ static void toStage(VkPipelineStageFlags2KHR& stage, VkAccessFlagBits2KHR& acces
acc |= VK_ACCESS_UNIFORM_READ_BIT;
}

if((rs&ResourceAccess::UavRead)==ResourceAccess::UavRead) {
ret |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
// memory barriers
if((rs&ResourceAccess::UavReadGr)==ResourceAccess::UavReadGr) {
ret |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
acc |= VK_ACCESS_SHADER_READ_BIT;
}
if((rs&ResourceAccess::UavWrite)==ResourceAccess::UavWrite) {
ret |= VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
if((rs&ResourceAccess::UavWriteGr)==ResourceAccess::UavWriteGr) {
ret |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
acc |= VK_ACCESS_SHADER_WRITE_BIT;
}

if((rs&ResourceAccess::UavReadComp)==ResourceAccess::UavReadComp) {
ret |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
acc |= VK_ACCESS_SHADER_READ_BIT;
}
if((rs&ResourceAccess::UavWriteComp)==ResourceAccess::UavWriteComp) {
ret |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
acc |= VK_ACCESS_SHADER_WRITE_BIT;
}

Expand Down Expand Up @@ -132,9 +142,7 @@ static VkImageLayout toLayout(ResourceAccess rs) {
if((rs&ResourceAccess::Vertex)==ResourceAccess::Uniform)
return VK_IMAGE_LAYOUT_GENERAL;

if((rs&ResourceAccess::UavRead)==ResourceAccess::UavRead)
return VK_IMAGE_LAYOUT_GENERAL;
if((rs&ResourceAccess::UavWrite)==ResourceAccess::UavWrite)
if((rs&ResourceAccess::UavReadWriteAll)!=ResourceAccess::None)
return VK_IMAGE_LAYOUT_GENERAL;

return VK_IMAGE_LAYOUT_UNDEFINED;
Expand Down Expand Up @@ -355,7 +363,7 @@ void VCommandBuffer::setUniforms(AbstractGraphicsApi::Pipeline &p, AbstractGraph
VPipeline& px=reinterpret_cast<VPipeline&>(p);
VDescriptorArray& ux=reinterpret_cast<VDescriptorArray&>(u);
curUniforms = &ux;
curUniforms->ssboBarriers(resState);
curUniforms->ssboBarriers(resState,PipelineStage::S_Graphics);
vkCmdBindDescriptorSets(impl,VK_PIPELINE_BIND_POINT_GRAPHICS,
px.pipelineLayout,0,
1,&ux.desc,
Expand All @@ -369,7 +377,7 @@ void VCommandBuffer::setComputePipeline(AbstractGraphicsApi::CompPipeline& p) {
}

void VCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
curUniforms->ssboBarriers(resState);
curUniforms->ssboBarriers(resState,PipelineStage::S_Compute);
resState.flush(*this);
vkCmdDispatch(impl,uint32_t(x),uint32_t(y),uint32_t(z));
}
Expand Down Expand Up @@ -667,7 +675,7 @@ void VCommandBuffer::buildTlas(VkAccelerationStructureKHR dest,
void VCommandBuffer::copy(AbstractGraphicsApi::Buffer& dst, size_t offset,
AbstractGraphicsApi::Texture& src, uint32_t width, uint32_t height, uint32_t mip) {
auto& nDst = reinterpret_cast<VBuffer&>(dst);
resState.onUavUsage(nDst.nonUniqId,0);
resState.onUavUsage(nDst.nonUniqId,0,PipelineStage::S_Tranfer);
resState.setLayout(src,ResourceAccess::TransferSrc);
resState.flush(*this);

Expand Down Expand Up @@ -776,9 +784,10 @@ void VCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_
info.bufferMemoryBarrierCount = bufCount;
info.pImageMemoryBarriers = imgBarrier;
info.imageMemoryBarrierCount = imgCount;
info.pMemoryBarriers = &memBarrier;
if(memBarrier.sType==VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR)
if(memBarrier.sType==VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR) {
info.pMemoryBarriers = &memBarrier;
info.memoryBarrierCount++;
}

vkCmdPipelineBarrier2(impl,&info);
}
Expand Down
4 changes: 2 additions & 2 deletions Engine/gapi/vulkan/vdescriptorarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void VDescriptorArray::set(size_t id, AbstractGraphicsApi::Texture** t, size_t c
vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr);
}

void VDescriptorArray::ssboBarriers(ResourceState& res) {
void VDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) {
auto& lay = this->lay.handler->lay;
if(T_UNLIKELY(uavUsage.durty)) {
uavUsage.read = 0;
Expand All @@ -248,7 +248,7 @@ void VDescriptorArray::ssboBarriers(ResourceState& res) {
}
uavUsage.durty = false;
}
res.onUavUsage(uavUsage);
res.onUavUsage(uavUsage,st);
}

void VDescriptorArray::addPoolSize(VkDescriptorPoolSize *p, size_t &sz, uint32_t cnt, VkDescriptorType elt) {
Expand Down
2 changes: 1 addition & 1 deletion Engine/gapi/vulkan/vdescriptorarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class VDescriptorArray : public AbstractGraphicsApi::Desc {

void set (size_t id, AbstractGraphicsApi::Texture** tex, size_t cnt, const Sampler2d& smp) override;

void ssboBarriers(Detail::ResourceState& res) override;
void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override;

VkDescriptorSet desc=VK_NULL_HANDLE;

Expand Down
2 changes: 1 addition & 1 deletion Engine/gapi/vulkanapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ void VulkanApi::readPixels(AbstractGraphicsApi::Device *d, Pixmap& out, const PT

const size_t size = bsz.w*bsz.h*bpb;
Detail::VBuffer stage = dx.allocator.alloc(nullptr,size,1,1,MemUsage::TransferDst,BufferHeap::Readback);
ResourceAccess defLay = storageImg ? ResourceAccess::UavRead : ResourceAccess::Sampler;
ResourceAccess defLay = storageImg ? (ResourceAccess::UavReadGr | ResourceAccess::UavReadComp) : ResourceAccess::Sampler;

auto cmd = dx.dataMgr().get();
cmd->begin();
Expand Down

0 comments on commit c65cfa2

Please sign in to comment.