Skip to content

Commit

Permalink
cleanup DX12 descriptor managment
Browse files Browse the repository at this point in the history
  • Loading branch information
Try committed Mar 29, 2023
1 parent 2a9ee96 commit 116c18d
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 102 deletions.
37 changes: 2 additions & 35 deletions Engine/gapi/directx12/dxcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,8 +303,7 @@ void DxCommandBuffer::end() {
dxAssert(impl->Close());
state = NoRecording;
resetDone = false;
for(size_t i=0; i<DxPipelineLay::HEAP_MAX; ++i)
currentHeaps[i] = nullptr;
curHeaps = DxDescriptorArray::CbState{};
}

void DxCommandBuffer::reset() {
Expand Down Expand Up @@ -467,39 +466,7 @@ void DxCommandBuffer::setUniforms(AbstractGraphicsApi::Pipeline& /*p*/, Abstract
void DxCommandBuffer::implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompute) {
DxDescriptorArray& ux = reinterpret_cast<DxDescriptorArray&>(u);
curUniforms = &ux;
ux.bind(*impl, currentHeaps, isCompute);

/*
auto& lx = *ux.lay.handler;
if(lx.isRuntimeSized()) {
ux.bind(*impl, currentHeaps, isCompute);
return;
}
bool setH = false;
for(size_t i=0; i<DxPipelineLay::HEAP_MAX; ++i) {
if(ux.val.heap[i]!=currentHeaps[i]) {
setH = true;
break;
}
}
if(setH) {
for(size_t i=0; i<DxPipelineLay::HEAP_MAX; ++i)
currentHeaps[i] = ux.val.heap[i];
// NOTE: pDescriptorHeaps[i] must not be NULL
impl->SetDescriptorHeaps(ux.heapCnt, currentHeaps);
}
for(size_t i=0;i<lx.roots.size();++i) {
auto& r = lx.roots[i];
auto desc = ux.val.gpu[r.heap];
desc.ptr += r.heapOffset;
if(isCompute)
impl->SetComputeRootDescriptorTable (UINT(i), desc); else
impl->SetGraphicsRootDescriptorTable(UINT(i), desc);
}
*/
ux.bind(*impl, curHeaps, isCompute);
}

void DxCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) {
Expand Down
3 changes: 2 additions & 1 deletion Engine/gapi/directx12/dxcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "comptr.h"
#include "gapi/resourcestate.h"
#include "dxdescriptorarray.h"
#include "dxfbolayout.h"
#include "dxpipelinelay.h"

Expand Down Expand Up @@ -91,8 +92,8 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer {

DxFboLayout fboLayout;

ID3D12DescriptorHeap* currentHeaps[DxPipelineLay::HEAP_MAX] = {};
AbstractGraphicsApi::Desc* curUniforms = nullptr;
DxDescriptorArray::CbState curHeaps;

uint32_t pushBaseInstanceId = -1;

Expand Down
20 changes: 13 additions & 7 deletions Engine/gapi/directx12/dxdescriptorallocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,34 +51,40 @@ DxDescriptorAllocator::DxDescriptorAllocator() {
}

void DxDescriptorAllocator::setDevice(DxDevice& device) {
provider.device = &device;
providerRes.device = &device;
providerSmp.device = &device;

auto& dx = *device.device.get();
descSize = dx.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
smpSize = dx.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);

allocator.setDefaultPageSize(2048);
allocatorRes.setDefaultPageSize(65535); // 1'000'000 is allowed to preallocate, but 65k is fine
allocatorSmp.setDefaultPageSize( 2048);
}

DxDescriptorAllocator::Allocation DxDescriptorAllocator::alloc(size_t count, bool smp) {
if(count==0)
return Allocation();
uint32_t id = (smp ? 1 : 0);
uint32_t id = (smp ? 1 : 0);
auto& allocator = (smp ? allocatorSmp : allocatorRes);
auto ret = allocator.alloc(count, 1, id, id, false);
return ret;
}

void DxDescriptorAllocator::free(Allocation& page) {
if(page.page!=nullptr)
if(page.page!=nullptr) {
bool smp = (page.page->heapId==1);
auto& allocator = (smp ? allocatorSmp : allocatorRes);
allocator.free(page);
}
}

ID3D12DescriptorHeap* DxDescriptorAllocator::heapof(const Allocation& a) {
return a.page!=nullptr ? a.page->memory : nullptr;
}

D3D12_CPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::handle(const Allocation& a) {
D3D12_CPU_DESCRIPTOR_HANDLE ptr = D3D12_CPU_DESCRIPTOR_HANDLE();
D3D12_CPU_DESCRIPTOR_HANDLE ptr = {};
if(a.page==nullptr)
return ptr;

Expand All @@ -92,8 +98,8 @@ D3D12_CPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::handle(const Allocation& a) {
return ptr;
}

D3D12_GPU_DESCRIPTOR_HANDLE Tempest::Detail::DxDescriptorAllocator::gpuHandle(const Allocation& a) {
D3D12_GPU_DESCRIPTOR_HANDLE ptr = D3D12_GPU_DESCRIPTOR_HANDLE();
D3D12_GPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::gpuHandle(const Allocation& a) {
D3D12_GPU_DESCRIPTOR_HANDLE ptr = {};
if(a.page==nullptr)
return ptr;

Expand Down
9 changes: 6 additions & 3 deletions Engine/gapi/directx12/dxdescriptorallocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class DxDescriptorAllocator {
DeviceMemory alloc(size_t size, uint32_t typeId);
void free(DeviceMemory m, size_t size, uint32_t typeId);
};
void setDevice(DxDevice& device);
void setDevice(DxDevice& device);

using Allocation=typename Tempest::Detail::DeviceAllocator<Provider>::Allocation;

Expand All @@ -38,8 +38,11 @@ class DxDescriptorAllocator {
D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle(const Allocation& a);

private:
Provider provider;
Detail::DeviceAllocator<Provider> allocator{provider};
Provider providerRes;
Detail::DeviceAllocator<Provider> allocatorRes{providerRes};

Provider providerSmp;
Detail::DeviceAllocator<Provider> allocatorSmp{providerSmp};

uint32_t descSize = 1;
uint32_t smpSize = 1;
Expand Down
80 changes: 37 additions & 43 deletions Engine/gapi/directx12/dxdescriptorarray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,13 @@ DxDescriptorArray::DxDescriptorArray(DxPipelineLay& vlay)
reallocSet(0, 0);
} else {
auto* h = lay.handler->heaps;
heap[HEAP_RES] = allocator.alloc(h[HEAP_RES].numDesc,false);
heap[HEAP_SMP] = allocator.alloc(h[HEAP_SMP].numDesc,true);

size_t len[HEAP_MAX] = {h[HEAP_RES].numDesc, h[HEAP_SMP].numDesc};
for(auto& l:len)
l = ((l+ALLOC_GRANULARITY-1u) & (~(ALLOC_GRANULARITY-1u)));

heap[HEAP_RES] = allocator.alloc(len[HEAP_RES],false);
heap[HEAP_SMP] = allocator.alloc(len[HEAP_SMP],true);

if((heap[HEAP_RES].page==nullptr && h[HEAP_RES].numDesc>0) ||
(heap[HEAP_SMP].page==nullptr && h[HEAP_SMP].numDesc>0)) {
Expand Down Expand Up @@ -171,14 +176,12 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Texture** tex, size_
auto& l = lay.handler->lay[id];

if(l.runtimeSized) {
constexpr uint32_t granularity = DxPipelineLay::BINDLESS_GRANULARITY;
uint32_t rSz = ((cnt+granularity-1u) & (~(granularity-1u)));
if(rSz!=runtimeArrays[id].size) {
if(cnt!=runtimeArrays[id].size) {
auto prev = std::move(runtimeArrays[id].data);
runtimeArrays[id].data.assign(tex, tex+cnt);
try {
reallocSet(id, rSz);
runtimeArrays[id].size = rSz;
reallocSet(id, cnt);
runtimeArrays[id].size = cnt;
runtimeArrays[id].mipLevel = mipLevel;
runtimeArrays[id].smp = smp;
}
Expand Down Expand Up @@ -232,14 +235,12 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Buffer** b, size_t c
auto& l = lay.handler->lay[id];

if(l.runtimeSized) {
constexpr uint32_t granularity = 1; //DxPipelineLay::MAX_BINDLESS;
uint32_t rSz = ((cnt+granularity-1u) & (~(granularity-1u)));
if(rSz!=runtimeArrays[id].size) {
if(cnt!=runtimeArrays[id].size) {
auto prev = std::move(runtimeArrays[id].data);
runtimeArrays[id].data.assign(b, b+cnt);
try {
reallocSet(id, rSz);
runtimeArrays[id].size = rSz;
reallocSet(id, cnt);
runtimeArrays[id].size = cnt;
runtimeArrays[id].offset = 0;
}
catch(...) {
Expand All @@ -265,7 +266,7 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Buffer** b, size_t c
for(size_t i=0; i<cnt; ++i) {
if(b[i]==nullptr)
continue;
auto& buf = *reinterpret_cast<DxBuffer*>(b[i]);
auto& buf = *reinterpret_cast<DxBuffer*>(b[i]);
placeInHeap(device, prm.rgnType, descPtr, heapOffset + i*descSize, buf, 0, lay.handler->lay[id].byteSize);
}
}
Expand All @@ -291,7 +292,7 @@ void DxDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) {
res.onUavUsage(uavUsage,st);
}

void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHeap** currentHeaps, bool isCompute) {
void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, CbState& state, bool isCompute) {
auto& allocator = lay.handler->dev.descAlloc;
auto& lx = *lay.handler;

Expand All @@ -302,10 +303,9 @@ void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHe
heaps[HEAP_RES] = allocator.heapof(heap[HEAP_RES]);
heaps[HEAP_SMP] = allocator.heapof(heap[HEAP_SMP]);

if(currentHeaps[HEAP_RES]!=heaps[HEAP_RES] || currentHeaps[HEAP_SMP]!=heaps[HEAP_SMP]) {
// TODO: single heap case
currentHeaps[HEAP_RES] = heaps[HEAP_RES];
currentHeaps[HEAP_SMP] = heaps[HEAP_SMP];
if(state.heaps[HEAP_RES]!=heaps[HEAP_RES] || state.heaps[HEAP_SMP]!=heaps[HEAP_SMP]) {
state.heaps[HEAP_RES] = heaps[HEAP_RES];
state.heaps[HEAP_SMP] = heaps[HEAP_SMP];

const uint8_t cnt = (heaps[HEAP_SMP]==nullptr ? 1 : 2);
if(heaps[HEAP_RES]==nullptr)
Expand Down Expand Up @@ -348,23 +348,21 @@ void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHe
}
}

void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) {
void DxDescriptorArray::reallocSet(size_t id, size_t newRuntimeSz) {
auto& device = *lay.handler->dev.device;
auto& allocator = lay.handler->dev.descAlloc;

const uint32_t descSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
const uint32_t smpSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);

const size_t heapOffset = lay.handler->heaps[0].numDesc;
const size_t heapOffsetSmp = lay.handler->heaps[1].numDesc;
const size_t heapOffset = lay.handler->heaps[HEAP_RES].numDesc;
const size_t heapOffsetSmp = lay.handler->heaps[HEAP_SMP].numDesc;

size_t lenOld[HEAP_MAX] = {heapOffset, heapOffsetSmp};
size_t len [HEAP_MAX] = {heapOffset, heapOffsetSmp};
size_t len[HEAP_MAX] = {heapOffset, heapOffsetSmp};
for(size_t i=0; i<runtimeArrays.size(); ++i) {
auto& l = lay.handler->lay[i];
auto& prm = lay.handler->prm[i];
auto size = (i==id ? newRuntimeSz : runtimeArrays[i].size);
auto sizeOld = runtimeArrays[i].size;

if(l.runtimeSized) {
runtimeArrays[i].heapOffset = len[HEAP_RES]*descSize;
Expand All @@ -374,26 +372,24 @@ void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) {
runtimeArrays[i].heapOffsetSmp = prm.heapOffsetSmp;
}

if(l.cls!=ShaderReflection::Sampler) {
len [HEAP_RES] += size;
lenOld[HEAP_RES] += sizeOld;
}

if(l.hasSampler()) {
len [HEAP_SMP] += size;
lenOld[HEAP_SMP] += sizeOld;
}
if(l.cls!=ShaderReflection::Sampler)
len[HEAP_RES] += size;
if(l.hasSampler())
len[HEAP_SMP] += size;
}

for(auto& lx:len)
lx = ((lx+ALLOC_GRANULARITY-1u) & (~(ALLOC_GRANULARITY-1u)));

Allocation heapDesc, heapSmp;
try {
if((len[0]!=lenOld[HEAP_RES] || heap[HEAP_RES].size==0) && len[HEAP_RES]>0) {
heapDesc = allocator.alloc(len[0], false);
if(len[HEAP_RES]!=heap[HEAP_RES].size) {
heapDesc = allocator.alloc(len[HEAP_RES], false);
if(heapDesc.page==nullptr)
throw std::bad_alloc();
}
if((len[1]!=lenOld[1] || heap[1].size==0) && len[1]>0) {
heapSmp = allocator.alloc(len[1], true);
if(len[HEAP_SMP]!=heap[HEAP_SMP].size) {
heapSmp = allocator.alloc(len[HEAP_SMP], true);
if(heapSmp.page==nullptr)
throw std::bad_alloc();
}
Expand All @@ -405,9 +401,9 @@ void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) {
}

if(heapDesc.size>0 || len[HEAP_RES]==0)
std::swap(heap[0], heapDesc);
std::swap(heap[HEAP_RES], heapDesc);
if(heapSmp .size>0 || len[HEAP_SMP]==0)
std::swap(heap[1], heapSmp);
std::swap(heap[HEAP_SMP], heapSmp);

allocator.free(heapDesc);
allocator.free(heapSmp);
Expand All @@ -420,8 +416,8 @@ void DxDescriptorArray::reflushSet() {
uint32_t descSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
uint32_t smpSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);

auto descPtr = allocator.handle(heap[0]);
auto smpPtr = allocator.handle(heap[1]);
auto descPtr = allocator.handle(heap[HEAP_RES]);
auto smpPtr = allocator.handle(heap[HEAP_SMP]);

for(size_t id=0; id<lay.handler->lay.size(); ++id) {
auto& prm = lay.handler->prm[id];
Expand Down Expand Up @@ -495,8 +491,6 @@ void DxDescriptorArray::placeInHeap(ID3D12Device& device, D3D12_DESCRIPTOR_RANGE
desc.Texture2D.MipSlice = mipLevel;
}

// auto gpu = val.cpu[prm.heapId];
// gpu.ptr += (prm.heapOffset + i*descSize);
auto gpu = at;
gpu.ptr += heapOffset;
device.CreateUnorderedAccessView(t.impl.get(),nullptr,&desc,gpu);
Expand Down
20 changes: 13 additions & 7 deletions Engine/gapi/directx12/dxdescriptorarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc {
DxDescriptorArray(DxDescriptorArray&& other);
~DxDescriptorArray();

enum {
HEAP_RES = DxPipelineLay::HEAP_RES,
HEAP_SMP = DxPipelineLay::HEAP_SMP,
HEAP_MAX = DxPipelineLay::HEAP_MAX,
ALLOC_GRANULARITY = 4,
};

struct CbState {
ID3D12DescriptorHeap* heaps[HEAP_MAX] = {};
};

void set (size_t id, AbstractGraphicsApi::Texture *tex, const Sampler& smp, uint32_t mipLevel) override;
void set (size_t id, AbstractGraphicsApi::Buffer* buf, size_t offset) override;
void set (size_t id, const Sampler& smp) override;
Expand All @@ -29,19 +40,14 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc {

void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override;

void bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHeap** currentHeaps, bool isCompute);
void bind(ID3D12GraphicsCommandList6& enc, CbState& state, bool isCompute);

DSharedPtr<DxPipelineLay*> lay;

private:
enum {
HEAP_RES = DxPipelineLay::HEAP_RES,
HEAP_SMP = DxPipelineLay::HEAP_SMP,
HEAP_MAX = DxPipelineLay::HEAP_MAX,
};
using Allocation = DxDescriptorAllocator::Allocation;

void reallocSet(size_t id, uint32_t newRuntimeSz);
void reallocSet(size_t id, size_t newRuntimeSz);
void reflushSet();

void placeInHeap(ID3D12Device& device, D3D12_DESCRIPTOR_RANGE_TYPE rgn, const D3D12_CPU_DESCRIPTOR_HANDLE& at,
Expand Down
Loading

0 comments on commit 116c18d

Please sign in to comment.