From c2f61c909c845b973462e9738f39e405ab2b1273 Mon Sep 17 00:00:00 2001 From: Try Date: Thu, 11 Apr 2024 22:50:56 +0200 Subject: [PATCH] command split for DX12 #27 --- Engine/gapi/abstractgraphicsapi.h | 1 + Engine/gapi/directx12/comptr.h | 5 ++ Engine/gapi/directx12/dxcommandbuffer.cpp | 64 +++++++++++++++++++---- Engine/gapi/directx12/dxcommandbuffer.h | 8 +++ Engine/gapi/directx12/dxdevice.cpp | 14 +++-- Engine/gapi/directx12/dxdevice.h | 1 - Engine/gapi/directx12/dxpipeline.cpp | 3 -- Engine/gapi/directx12api.cpp | 12 +++-- Engine/gapi/vulkan/vcommandbuffer.cpp | 8 ++- Engine/gapi/vulkan/vdevice.cpp | 4 +- 10 files changed, 94 insertions(+), 26 deletions(-) diff --git a/Engine/gapi/abstractgraphicsapi.h b/Engine/gapi/abstractgraphicsapi.h index 24e54807..faff9ee1 100644 --- a/Engine/gapi/abstractgraphicsapi.h +++ b/Engine/gapi/abstractgraphicsapi.h @@ -93,6 +93,7 @@ namespace Tempest { enum : uint8_t { MaxFramebufferAttachments = 8+1, MaxBarriers = 64, + MaxCmdChunks = 64, }; enum Topology : uint8_t { diff --git a/Engine/gapi/directx12/comptr.h b/Engine/gapi/directx12/comptr.h index 653493fb..9d5c38fa 100644 --- a/Engine/gapi/directx12/comptr.h +++ b/Engine/gapi/directx12/comptr.h @@ -22,6 +22,11 @@ class ComPtr final { T& operator * () { return *p; } T*& get() { return p; } T* get() const { return p; } + T* release() { + auto ret = p; + p = nullptr; + return ret; + } private: T* p=nullptr; diff --git a/Engine/gapi/directx12/dxcommandbuffer.cpp b/Engine/gapi/directx12/dxcommandbuffer.cpp index 6f1d3f8e..cbced514 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.cpp +++ b/Engine/gapi/directx12/dxcommandbuffer.cpp @@ -20,7 +20,7 @@ using namespace Tempest::Detail; static void beginEvent(ID3D12GraphicsCommandList& cmd, uint32_t meta, const wchar_t* buf) { // NOTE: pix is too much trouble to integrate - cmd.BeginEvent(meta, buf, std::wcslen(buf)*sizeof(wchar_t)); + cmd.BeginEvent(meta, buf, UINT(std::wcslen(buf)*sizeof(wchar_t))); } static void endEvent(ID3D12GraphicsCommandList& cmd) { @@ -375,18 +375,20 @@ struct DxCommandBuffer::FillUAV : Stage { DxCommandBuffer::DxCommandBuffer(DxDevice& d) : dev(d) { - D3D12_COMMAND_LIST_TYPE type = D3D12_COMMAND_LIST_TYPE_DIRECT; - dxAssert(d.device->CreateCommandAllocator(type, + dxAssert(d.device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, uuid(), reinterpret_cast(&pool))); - - dxAssert(d.device->CreateCommandList(0, type, pool.get(), nullptr, - uuid(), reinterpret_cast(&impl))); - impl->Close(); } DxCommandBuffer::~DxCommandBuffer() { clearStage(); + auto node = chunks.begin(); + for(size_t i=0; ival[i%chunks.chunkSize].impl; + dxAssert(cmd->Release()); + if(i+1==chunks.chunkSize) + node = node->next; + } } void DxCommandBuffer::begin(bool transfer) { @@ -394,6 +396,10 @@ void DxCommandBuffer::begin(bool transfer) { state = Idle; if(transfer) resState.clearReaders(); + + if(impl.get()==nullptr) { + newChunk(); + } } void DxCommandBuffer::begin() { @@ -406,19 +412,32 @@ void DxCommandBuffer::end() { isDbgRegion = false; } resState.finalize(*this); - - dxAssert(impl->Close()); state = NoRecording; resetDone = false; - curHeaps = DxDescriptorArray::CbState{}; + + pushChunk(); } void DxCommandBuffer::reset() { if(resetDone) return; clearStage(); + dxAssert(pool->Reset()); - dxAssert(impl->Reset(pool.get(),nullptr)); + SmallArray flat(chunks.size()); + auto node = chunks.begin(); + if(chunks.size()>0) { + impl = ComPtr(node->val[0].impl); + dxAssert(impl->Reset(pool.get(),nullptr)); + } + for(size_t i=1; ival[i%chunks.chunkSize].impl; + cmd->Release(); + // dxAssert(cmd->Reset(pool.get(),nullptr)); + if(i+1==chunks.chunkSize) + node = node->next; + } + chunks.clear(); resetDone = true; } @@ -432,6 +451,10 @@ void DxCommandBuffer::beginRendering(const AttachmentDesc* desc, size_t descSize resState.joinWriters(PipelineStage::S_Graphics); resState.setRenderpass(*this,desc,descSize,frm,att,sw,imgId); + if(state!=Idle) { + newChunk(); + } + D3D12_RENDER_PASS_RENDER_TARGET_DESC view[MaxFramebufferAttachments] = {}; UINT viewSz = 0; D3D12_RENDER_PASS_DEPTH_STENCIL_DESC zdesc = {}; @@ -860,6 +883,25 @@ void DxCommandBuffer::buildTlas(AbstractGraphicsApi::Buffer& tbo, impl->BuildRaytracingAccelerationStructure(&desc,0,nullptr); } +void DxCommandBuffer::pushChunk() { + if(impl.get()!=nullptr) { + dxAssert(impl->Close()); + Chunk ch; + ch.impl = impl.release(); + chunks.push(ch); + + impl = nullptr; + curHeaps = DxDescriptorArray::CbState{}; + } + } + +void DxCommandBuffer::newChunk() { + pushChunk(); + + dxAssert(dev.device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, pool.get(), nullptr, + uuid(), reinterpret_cast(&impl))); + } + void DxCommandBuffer::clearStage() { while(stageResources!=nullptr) { auto s = stageResources; diff --git a/Engine/gapi/directx12/dxcommandbuffer.h b/Engine/gapi/directx12/dxcommandbuffer.h index ba3499bf..0a724062 100644 --- a/Engine/gapi/directx12/dxcommandbuffer.h +++ b/Engine/gapi/directx12/dxcommandbuffer.h @@ -89,6 +89,11 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer { ID3D12GraphicsCommandList* get() { return impl.get(); } + struct Chunk { + ID3D12GraphicsCommandList6* impl = nullptr; + }; + Detail::SmallList chunks; + private: DxDevice& dev; ComPtr pool; @@ -120,6 +125,9 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer { std::unordered_set indirectCmd; + void pushChunk(); + void newChunk(); + void prepareDraw(size_t voffset, size_t firstInstance); void clearStage(); void pushStage(Stage* cmd); diff --git a/Engine/gapi/directx12/dxdevice.cpp b/Engine/gapi/directx12/dxdevice.cpp index 03eacd5b..7db9bc8a 100644 --- a/Engine/gapi/directx12/dxdevice.cpp +++ b/Engine/gapi/directx12/dxdevice.cpp @@ -296,12 +296,20 @@ void Detail::DxDevice::waitIdle() { dxAssert(idleFence->Signal(DxFence::Waiting)); } -void DxDevice::submit(DxCommandBuffer& cmdBuffer, DxFence* sync) { +void DxDevice::submit(DxCommandBuffer& cmd, DxFence* sync) { sync->reset(); + const size_t size = cmd.chunks.size(); + SmallArray flat(size); + auto node = cmd.chunks.begin(); + for(size_t i=0; ival[i%cmd.chunks.chunkSize].impl; + if(i+1==cmd.chunks.chunkSize) + node = node->next; + } + std::lock_guard guard(syncCmdQueue); - ID3D12CommandList* cmd[] = {cmdBuffer.get()}; - cmdQueue->ExecuteCommandLists(1, cmd); + cmdQueue->ExecuteCommandLists(UINT(size), flat.get()); sync->signal(*cmdQueue); } diff --git a/Engine/gapi/directx12/dxdevice.h b/Engine/gapi/directx12/dxdevice.h index b70f12cd..ce0aec31 100644 --- a/Engine/gapi/directx12/dxdevice.h +++ b/Engine/gapi/directx12/dxdevice.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include diff --git a/Engine/gapi/directx12/dxpipeline.cpp b/Engine/gapi/directx12/dxpipeline.cpp index 57e7b8f8..463db9ff 100644 --- a/Engine/gapi/directx12/dxpipeline.cpp +++ b/Engine/gapi/directx12/dxpipeline.cpp @@ -199,9 +199,6 @@ ComPtr DxPipeline::initGraphicsPipeline(const DxFboLayout& ComPtr ret; auto err = device.device->CreateGraphicsPipelineState(&psoDesc, uuid(), reinterpret_cast(&ret.get())); if(FAILED(err)) { - for(auto& i:modules) - if(i.handler!=nullptr) - ;//i.handler->disasm(); dxAssert(err); } return ret; diff --git a/Engine/gapi/directx12api.cpp b/Engine/gapi/directx12api.cpp index 12a355bc..385f0c97 100644 --- a/Engine/gapi/directx12api.cpp +++ b/Engine/gapi/directx12api.cpp @@ -445,10 +445,14 @@ void DirectX12Api::present(AbstractGraphicsApi::Device* d, AbstractGraphicsApi:: sx.queuePresent(); } -void DirectX12Api::submit(AbstractGraphicsApi::Device* d, AbstractGraphicsApi::CommandBuffer* cmd, AbstractGraphicsApi::Fence* doneCpu) { - Detail::DxCommandBuffer& bx = *reinterpret_cast(cmd); - ID3D12CommandList* cmdList[] = { bx.get() }; - impl->submit(d,cmdList,1,doneCpu); +void DirectX12Api::submit(AbstractGraphicsApi::Device* d, + AbstractGraphicsApi::CommandBuffer* cx, + AbstractGraphicsApi::Fence* doneCpu) { + auto& dx = *reinterpret_cast(d); + auto& sync = *reinterpret_cast(doneCpu); + auto& cmd = *reinterpret_cast(cx); + + dx.submit(cmd, &sync); } void DirectX12Api::getCaps(AbstractGraphicsApi::Device* d, AbstractGraphicsApi::Props& caps) { diff --git a/Engine/gapi/vulkan/vcommandbuffer.cpp b/Engine/gapi/vulkan/vcommandbuffer.cpp index 324aafeb..4c33ac97 100644 --- a/Engine/gapi/vulkan/vcommandbuffer.cpp +++ b/Engine/gapi/vulkan/vcommandbuffer.cpp @@ -187,10 +187,14 @@ VCommandBuffer::VCommandBuffer(VDevice& device, VkCommandPoolCreateFlags flags) } VCommandBuffer::~VCommandBuffer() { + if(impl!=nullptr) { + vkFreeCommandBuffers(device.device.impl,pool.impl,1,&impl); + } + if(chunks.size()==0) return; - SmallArray flat(chunks.size()); + SmallArray flat(chunks.size()); auto node = chunks.begin(); for(size_t i=0; ival[i%chunks.chunkSize].impl; @@ -203,7 +207,7 @@ VCommandBuffer::~VCommandBuffer() { void VCommandBuffer::reset() { vkAssert(vkResetCommandPool(device.device.impl,pool.impl,0)); - SmallArray flat(chunks.size()); + SmallArray flat(chunks.size()); auto node = chunks.begin(); if(chunks.size()>0) { impl = node->val[0].impl; diff --git a/Engine/gapi/vulkan/vdevice.cpp b/Engine/gapi/vulkan/vdevice.cpp index ed6ce299..6e55e775 100644 --- a/Engine/gapi/vulkan/vdevice.cpp +++ b/Engine/gapi/vulkan/vdevice.cpp @@ -534,7 +534,7 @@ void VDevice::submit(VCommandBuffer& cmd, VFence* sync) { wait2[i].stageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; wait2[i].deviceIndex = 0; } - SmallArray flat(cmd.chunks.size()); + SmallArray flat(cmd.chunks.size()); auto node = cmd.chunks.begin(); for(size_t i=0; i flat(cmd.chunks.size()); + SmallArray flat(cmd.chunks.size()); auto node = cmd.chunks.begin(); for(size_t i=0; ival[i%cmd.chunks.chunkSize].impl;