diff --git a/Engine/gapi/directx12/dxcommandbuffer.cpp b/Engine/gapi/directx12/dxcommandbuffer.cpp index 8ee8a18a..e51cb84d 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.cpp +++ b/Engine/gapi/directx12/dxcommandbuffer.cpp @@ -303,8 +303,7 @@ void DxCommandBuffer::end() { dxAssert(impl->Close()); state = NoRecording; resetDone = false; - for(size_t i=0; i(u); curUniforms = &ux; - ux.bind(*impl, currentHeaps, isCompute); - - /* - auto& lx = *ux.lay.handler; - if(lx.isRuntimeSized()) { - ux.bind(*impl, currentHeaps, isCompute); - return; - } - - bool setH = false; - for(size_t i=0; iSetDescriptorHeaps(ux.heapCnt, currentHeaps); - } - - for(size_t i=0;iSetComputeRootDescriptorTable (UINT(i), desc); else - impl->SetGraphicsRootDescriptorTable(UINT(i), desc); - } - */ + ux.bind(*impl, curHeaps, isCompute); } void DxCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) { diff --git a/Engine/gapi/directx12/dxcommandbuffer.h b/Engine/gapi/directx12/dxcommandbuffer.h index 664b8ec7..392cf2bb 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.h +++ b/Engine/gapi/directx12/dxcommandbuffer.h @@ -6,6 +6,7 @@ #include "comptr.h" #include "gapi/resourcestate.h" +#include "dxdescriptorarray.h" #include "dxfbolayout.h" #include "dxpipelinelay.h" @@ -91,8 +92,8 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer { DxFboLayout fboLayout; - ID3D12DescriptorHeap* currentHeaps[DxPipelineLay::HEAP_MAX] = {}; AbstractGraphicsApi::Desc* curUniforms = nullptr; + DxDescriptorArray::CbState curHeaps; uint32_t pushBaseInstanceId = -1; diff --git a/Engine/gapi/directx12/dxdescriptorallocator.cpp b/Engine/gapi/directx12/dxdescriptorallocator.cpp index 4ec9ad65..b48e1e0e 100644 --- a/Engine/gapi/directx12/dxdescriptorallocator.cpp +++ b/Engine/gapi/directx12/dxdescriptorallocator.cpp @@ -51,26 +51,32 @@ DxDescriptorAllocator::DxDescriptorAllocator() { } void DxDescriptorAllocator::setDevice(DxDevice& device) { - provider.device = &device; + providerRes.device = &device; + providerSmp.device = &device; auto& dx = *device.device.get(); descSize = dx.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); smpSize = dx.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - allocator.setDefaultPageSize(2048); + allocatorRes.setDefaultPageSize(65535); // 1'000'000 is allowed to preallocate, but 65k is fine + allocatorSmp.setDefaultPageSize( 2048); } DxDescriptorAllocator::Allocation DxDescriptorAllocator::alloc(size_t count, bool smp) { if(count==0) return Allocation(); - uint32_t id = (smp ? 1 : 0); + uint32_t id = (smp ? 1 : 0); + auto& allocator = (smp ? allocatorSmp : allocatorRes); auto ret = allocator.alloc(count, 1, id, id, false); return ret; } void DxDescriptorAllocator::free(Allocation& page) { - if(page.page!=nullptr) + if(page.page!=nullptr) { + bool smp = (page.page->heapId==1); + auto& allocator = (smp ? allocatorSmp : allocatorRes); allocator.free(page); + } } ID3D12DescriptorHeap* DxDescriptorAllocator::heapof(const Allocation& a) { @@ -78,7 +84,7 @@ ID3D12DescriptorHeap* DxDescriptorAllocator::heapof(const Allocation& a) { } D3D12_CPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::handle(const Allocation& a) { - D3D12_CPU_DESCRIPTOR_HANDLE ptr = D3D12_CPU_DESCRIPTOR_HANDLE(); + D3D12_CPU_DESCRIPTOR_HANDLE ptr = {}; if(a.page==nullptr) return ptr; @@ -92,8 +98,8 @@ D3D12_CPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::handle(const Allocation& a) { return ptr; } -D3D12_GPU_DESCRIPTOR_HANDLE Tempest::Detail::DxDescriptorAllocator::gpuHandle(const Allocation& a) { - D3D12_GPU_DESCRIPTOR_HANDLE ptr = D3D12_GPU_DESCRIPTOR_HANDLE(); +D3D12_GPU_DESCRIPTOR_HANDLE DxDescriptorAllocator::gpuHandle(const Allocation& a) { + D3D12_GPU_DESCRIPTOR_HANDLE ptr = {}; if(a.page==nullptr) return ptr; diff --git a/Engine/gapi/directx12/dxdescriptorallocator.h b/Engine/gapi/directx12/dxdescriptorallocator.h index bf048f04..ddedd977 100644 --- a/Engine/gapi/directx12/dxdescriptorallocator.h +++ b/Engine/gapi/directx12/dxdescriptorallocator.h @@ -26,7 +26,7 @@ class DxDescriptorAllocator { DeviceMemory alloc(size_t size, uint32_t typeId); void free(DeviceMemory m, size_t size, uint32_t typeId); }; - void setDevice(DxDevice& device); + void setDevice(DxDevice& device); using Allocation=typename Tempest::Detail::DeviceAllocator::Allocation; @@ -38,8 +38,11 @@ class DxDescriptorAllocator { D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle(const Allocation& a); private: - Provider provider; - Detail::DeviceAllocator allocator{provider}; + Provider providerRes; + Detail::DeviceAllocator allocatorRes{providerRes}; + + Provider providerSmp; + Detail::DeviceAllocator allocatorSmp{providerSmp}; uint32_t descSize = 1; uint32_t smpSize = 1; diff --git a/Engine/gapi/directx12/dxdescriptorarray.cpp b/Engine/gapi/directx12/dxdescriptorarray.cpp index a48d31c9..48acbcf7 100644 --- a/Engine/gapi/directx12/dxdescriptorarray.cpp +++ b/Engine/gapi/directx12/dxdescriptorarray.cpp @@ -70,8 +70,13 @@ DxDescriptorArray::DxDescriptorArray(DxPipelineLay& vlay) reallocSet(0, 0); } else { auto* h = lay.handler->heaps; - heap[HEAP_RES] = allocator.alloc(h[HEAP_RES].numDesc,false); - heap[HEAP_SMP] = allocator.alloc(h[HEAP_SMP].numDesc,true); + + size_t len[HEAP_MAX] = {h[HEAP_RES].numDesc, h[HEAP_SMP].numDesc}; + for(auto& l:len) + l = ((l+ALLOC_GRANULARITY-1u) & (~(ALLOC_GRANULARITY-1u))); + + heap[HEAP_RES] = allocator.alloc(len[HEAP_RES],false); + heap[HEAP_SMP] = allocator.alloc(len[HEAP_SMP],true); if((heap[HEAP_RES].page==nullptr && h[HEAP_RES].numDesc>0) || (heap[HEAP_SMP].page==nullptr && h[HEAP_SMP].numDesc>0)) { @@ -171,14 +176,12 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Texture** tex, size_ auto& l = lay.handler->lay[id]; if(l.runtimeSized) { - constexpr uint32_t granularity = DxPipelineLay::BINDLESS_GRANULARITY; - uint32_t rSz = ((cnt+granularity-1u) & (~(granularity-1u))); - if(rSz!=runtimeArrays[id].size) { + if(cnt!=runtimeArrays[id].size) { auto prev = std::move(runtimeArrays[id].data); runtimeArrays[id].data.assign(tex, tex+cnt); try { - reallocSet(id, rSz); - runtimeArrays[id].size = rSz; + reallocSet(id, cnt); + runtimeArrays[id].size = cnt; runtimeArrays[id].mipLevel = mipLevel; runtimeArrays[id].smp = smp; } @@ -232,14 +235,12 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Buffer** b, size_t c auto& l = lay.handler->lay[id]; if(l.runtimeSized) { - constexpr uint32_t granularity = 1; //DxPipelineLay::MAX_BINDLESS; - uint32_t rSz = ((cnt+granularity-1u) & (~(granularity-1u))); - if(rSz!=runtimeArrays[id].size) { + if(cnt!=runtimeArrays[id].size) { auto prev = std::move(runtimeArrays[id].data); runtimeArrays[id].data.assign(b, b+cnt); try { - reallocSet(id, rSz); - runtimeArrays[id].size = rSz; + reallocSet(id, cnt); + runtimeArrays[id].size = cnt; runtimeArrays[id].offset = 0; } catch(...) { @@ -265,7 +266,7 @@ void DxDescriptorArray::set(size_t id, AbstractGraphicsApi::Buffer** b, size_t c for(size_t i=0; i(b[i]); + auto& buf = *reinterpret_cast(b[i]); placeInHeap(device, prm.rgnType, descPtr, heapOffset + i*descSize, buf, 0, lay.handler->lay[id].byteSize); } } @@ -291,7 +292,7 @@ void DxDescriptorArray::ssboBarriers(ResourceState& res, PipelineStage st) { res.onUavUsage(uavUsage,st); } -void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHeap** currentHeaps, bool isCompute) { +void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, CbState& state, bool isCompute) { auto& allocator = lay.handler->dev.descAlloc; auto& lx = *lay.handler; @@ -302,10 +303,9 @@ void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHe heaps[HEAP_RES] = allocator.heapof(heap[HEAP_RES]); heaps[HEAP_SMP] = allocator.heapof(heap[HEAP_SMP]); - if(currentHeaps[HEAP_RES]!=heaps[HEAP_RES] || currentHeaps[HEAP_SMP]!=heaps[HEAP_SMP]) { - // TODO: single heap case - currentHeaps[HEAP_RES] = heaps[HEAP_RES]; - currentHeaps[HEAP_SMP] = heaps[HEAP_SMP]; + if(state.heaps[HEAP_RES]!=heaps[HEAP_RES] || state.heaps[HEAP_SMP]!=heaps[HEAP_SMP]) { + state.heaps[HEAP_RES] = heaps[HEAP_RES]; + state.heaps[HEAP_SMP] = heaps[HEAP_SMP]; const uint8_t cnt = (heaps[HEAP_SMP]==nullptr ? 1 : 2); if(heaps[HEAP_RES]==nullptr) @@ -348,23 +348,21 @@ void DxDescriptorArray::bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHe } } -void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) { +void DxDescriptorArray::reallocSet(size_t id, size_t newRuntimeSz) { auto& device = *lay.handler->dev.device; auto& allocator = lay.handler->dev.descAlloc; const uint32_t descSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); const uint32_t smpSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - const size_t heapOffset = lay.handler->heaps[0].numDesc; - const size_t heapOffsetSmp = lay.handler->heaps[1].numDesc; + const size_t heapOffset = lay.handler->heaps[HEAP_RES].numDesc; + const size_t heapOffsetSmp = lay.handler->heaps[HEAP_SMP].numDesc; - size_t lenOld[HEAP_MAX] = {heapOffset, heapOffsetSmp}; - size_t len [HEAP_MAX] = {heapOffset, heapOffsetSmp}; + size_t len[HEAP_MAX] = {heapOffset, heapOffsetSmp}; for(size_t i=0; ilay[i]; auto& prm = lay.handler->prm[i]; auto size = (i==id ? newRuntimeSz : runtimeArrays[i].size); - auto sizeOld = runtimeArrays[i].size; if(l.runtimeSized) { runtimeArrays[i].heapOffset = len[HEAP_RES]*descSize; @@ -374,26 +372,24 @@ void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) { runtimeArrays[i].heapOffsetSmp = prm.heapOffsetSmp; } - if(l.cls!=ShaderReflection::Sampler) { - len [HEAP_RES] += size; - lenOld[HEAP_RES] += sizeOld; - } - - if(l.hasSampler()) { - len [HEAP_SMP] += size; - lenOld[HEAP_SMP] += sizeOld; - } + if(l.cls!=ShaderReflection::Sampler) + len[HEAP_RES] += size; + if(l.hasSampler()) + len[HEAP_SMP] += size; } + for(auto& lx:len) + lx = ((lx+ALLOC_GRANULARITY-1u) & (~(ALLOC_GRANULARITY-1u))); + Allocation heapDesc, heapSmp; try { - if((len[0]!=lenOld[HEAP_RES] || heap[HEAP_RES].size==0) && len[HEAP_RES]>0) { - heapDesc = allocator.alloc(len[0], false); + if(len[HEAP_RES]!=heap[HEAP_RES].size) { + heapDesc = allocator.alloc(len[HEAP_RES], false); if(heapDesc.page==nullptr) throw std::bad_alloc(); } - if((len[1]!=lenOld[1] || heap[1].size==0) && len[1]>0) { - heapSmp = allocator.alloc(len[1], true); + if(len[HEAP_SMP]!=heap[HEAP_SMP].size) { + heapSmp = allocator.alloc(len[HEAP_SMP], true); if(heapSmp.page==nullptr) throw std::bad_alloc(); } @@ -405,9 +401,9 @@ void DxDescriptorArray::reallocSet(size_t id, uint32_t newRuntimeSz) { } if(heapDesc.size>0 || len[HEAP_RES]==0) - std::swap(heap[0], heapDesc); + std::swap(heap[HEAP_RES], heapDesc); if(heapSmp .size>0 || len[HEAP_SMP]==0) - std::swap(heap[1], heapSmp); + std::swap(heap[HEAP_SMP], heapSmp); allocator.free(heapDesc); allocator.free(heapSmp); @@ -420,8 +416,8 @@ void DxDescriptorArray::reflushSet() { uint32_t descSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); uint32_t smpSize = device.GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - auto descPtr = allocator.handle(heap[0]); - auto smpPtr = allocator.handle(heap[1]); + auto descPtr = allocator.handle(heap[HEAP_RES]); + auto smpPtr = allocator.handle(heap[HEAP_SMP]); for(size_t id=0; idlay.size(); ++id) { auto& prm = lay.handler->prm[id]; @@ -495,8 +491,6 @@ void DxDescriptorArray::placeInHeap(ID3D12Device& device, D3D12_DESCRIPTOR_RANGE desc.Texture2D.MipSlice = mipLevel; } - // auto gpu = val.cpu[prm.heapId]; - // gpu.ptr += (prm.heapOffset + i*descSize); auto gpu = at; gpu.ptr += heapOffset; device.CreateUnorderedAccessView(t.impl.get(),nullptr,&desc,gpu); diff --git a/Engine/gapi/directx12/dxdescriptorarray.h b/Engine/gapi/directx12/dxdescriptorarray.h index ceb8b352..22c9988d 100644 --- a/Engine/gapi/directx12/dxdescriptorarray.h +++ b/Engine/gapi/directx12/dxdescriptorarray.h @@ -19,6 +19,17 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc { DxDescriptorArray(DxDescriptorArray&& other); ~DxDescriptorArray(); + enum { + HEAP_RES = DxPipelineLay::HEAP_RES, + HEAP_SMP = DxPipelineLay::HEAP_SMP, + HEAP_MAX = DxPipelineLay::HEAP_MAX, + ALLOC_GRANULARITY = 4, + }; + + struct CbState { + ID3D12DescriptorHeap* heaps[HEAP_MAX] = {}; + }; + void set (size_t id, AbstractGraphicsApi::Texture *tex, const Sampler& smp, uint32_t mipLevel) override; void set (size_t id, AbstractGraphicsApi::Buffer* buf, size_t offset) override; void set (size_t id, const Sampler& smp) override; @@ -29,19 +40,14 @@ class DxDescriptorArray : public AbstractGraphicsApi::Desc { void ssboBarriers(Detail::ResourceState& res, PipelineStage st) override; - void bind(ID3D12GraphicsCommandList6& enc, ID3D12DescriptorHeap** currentHeaps, bool isCompute); + void bind(ID3D12GraphicsCommandList6& enc, CbState& state, bool isCompute); DSharedPtr lay; private: - enum { - HEAP_RES = DxPipelineLay::HEAP_RES, - HEAP_SMP = DxPipelineLay::HEAP_SMP, - HEAP_MAX = DxPipelineLay::HEAP_MAX, - }; using Allocation = DxDescriptorAllocator::Allocation; - void reallocSet(size_t id, uint32_t newRuntimeSz); + void reallocSet(size_t id, size_t newRuntimeSz); void reflushSet(); void placeInHeap(ID3D12Device& device, D3D12_DESCRIPTOR_RANGE_TYPE rgn, const D3D12_CPU_DESCRIPTOR_HANDLE& at, diff --git a/Engine/gapi/directx12/dxdevice.cpp b/Engine/gapi/directx12/dxdevice.cpp index 86d6d763..4c358476 100644 --- a/Engine/gapi/directx12/dxdevice.cpp +++ b/Engine/gapi/directx12/dxdevice.cpp @@ -190,6 +190,29 @@ void DxDevice::getProp(DXGI_ADAPTER_DESC1& desc, ID3D12Device& dev, AbstractGrap prop.type = arch.UMA ? DeviceType::Integrated : DeviceType::Discrete; } + D3D12_FEATURE_DATA_D3D12_OPTIONS feature0 = {}; + if(SUCCEEDED(dev.CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &feature0, sizeof(feature0)))) { + // feature0.ResourceHeapTier; // TODO: check use cases + + // TODO: expose per-stage limits + uint32_t maxSamplers = 16; + uint32_t maxResources = 16; + switch(feature0.ResourceBindingTier) { + case D3D12_RESOURCE_BINDING_TIER_1: + maxSamplers = 16; + maxResources = 8; + break; + case D3D12_RESOURCE_BINDING_TIER_2: + maxSamplers = 2048; + maxResources = 64; + break; + case D3D12_RESOURCE_BINDING_TIER_3: + maxSamplers = 2048; + maxResources = -1; + break; + } + } + D3D12_FEATURE_DATA_D3D12_OPTIONS5 feature5 = {}; if(SUCCEEDED(dev.CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &feature5, sizeof(feature5)))) { prop.raytracing.rayQuery = (feature5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1); diff --git a/Engine/gapi/directx12/dxpipelinelay.h b/Engine/gapi/directx12/dxpipelinelay.h index 6a2e77bf..f4a0c844 100644 --- a/Engine/gapi/directx12/dxpipelinelay.h +++ b/Engine/gapi/directx12/dxpipelinelay.h @@ -31,12 +31,9 @@ class DxPipelineLay : public AbstractGraphicsApi::PipelineLay { using Binding = ShaderReflection::Binding; enum { - HEAP_RES = 0, - HEAP_SMP = 1, - HEAP_MAX = 2, - - POOL_SIZE = 128, - BINDLESS_GRANULARITY = 256, + HEAP_RES = 0, + HEAP_SMP = 1, + HEAP_MAX = 2, }; struct Param {