Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented indirect dispatch #66

Merged
merged 9 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Engine/gapi/abstractgraphicsapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ namespace Tempest {
virtual void dispatchMeshIndirect(const Buffer& indirect, size_t offset);

virtual void dispatch(size_t x, size_t y, size_t z) = 0;
virtual void dispatchIndirect(const Buffer& indirect, size_t offset) = 0;
};

using PBuffer = Detail::DSharedPtr<Buffer*>;
Expand Down
73 changes: 45 additions & 28 deletions Engine/gapi/directx12/dxcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,23 @@ void DxCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
impl->Dispatch(UINT(x),UINT(y),UINT(z));
}

void DxCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
const DxBuffer& ind = reinterpret_cast<const DxBuffer&>(indirect);
auto& sign = dev.dispatchIndirectSgn.get();

curUniforms->ssboBarriers(resState, PipelineStage::S_Compute);

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
resState.flush(*this);

issueExplicitCommonToIndirectStateTransition(ind.impl.get());
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are 2 issues with this code:

  • you do both explicit synchronization and implicit (via onUavUsage)
  • resource barrier is quite expensive, and calling it per-draw is not a good call


impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0);

issueExplicitIndirectToCommonStateTransition(ind.impl.get());
}

void DxCommandBuffer::setPipeline(Tempest::AbstractGraphicsApi::Pipeline& p) {
DxPipeline& px = reinterpret_cast<DxPipeline&>(p);
pushBaseInstanceId = px.pushBaseInstanceId;
Expand Down Expand Up @@ -617,14 +634,7 @@ void DxCommandBuffer::implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompu

void DxCommandBuffer::restoreIndirect() {
for(auto& i:indirectCmd) {
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = i;
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitIndirectToCommonStateTransition(i);
}
indirectCmd.clear();
}
Expand Down Expand Up @@ -743,21 +753,14 @@ void DxCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect,
auto& sign = dev.drawIndirectSgn.get();

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);

if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) {
indirectCmd.insert(ind.impl.get());

D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = ind.impl.get();
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitCommonToIndirectStateTransition(ind.impl.get());
}

impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0);
}

Expand All @@ -770,19 +773,11 @@ void DxCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& in
auto& sign = dev.drawMeshIndirectSgn.get();

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);

if(true && indirectCmd.find(ind.impl.get())==indirectCmd.end()) {
indirectCmd.insert(ind.impl.get());

D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = ind.impl.get();
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
issueExplicitCommonToIndirectStateTransition(ind.impl.get());
}
impl->ExecuteIndirect(sign, 1, ind.impl.get(), UINT64(offset), nullptr, 0);
}
Expand Down Expand Up @@ -987,6 +982,28 @@ void DxCommandBuffer::copyNative(AbstractGraphicsApi::Buffer& dstBuf, size_t off
impl->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
}

void DxCommandBuffer::issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter)
{
D3D12_RESOURCE_BARRIER barrier;
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.Transition.pResource = buf;
barrier.Transition.StateAfter = stateAfter;
barrier.Transition.StateBefore = stateBefore;
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
impl->ResourceBarrier(1, &barrier);
}

void DxCommandBuffer::issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf)
{
issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
}

void DxCommandBuffer::issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf)
{
issueExplicitResourceStateTransition(buf, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_COMMON);
}

#endif


5 changes: 5 additions & 0 deletions Engine/gapi/directx12/dxcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect (const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;

Expand Down Expand Up @@ -133,6 +134,10 @@ class DxCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void pushStage(Stage* cmd);
void implSetUniforms(AbstractGraphicsApi::Desc& u, bool isCompute);
void restoreIndirect();

void issueExplicitResourceStateTransition(ID3D12Resource* buf, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter);
void issueExplicitCommonToIndirectStateTransition(ID3D12Resource* buf);
void issueExplicitIndirectToCommonStateTransition(ID3D12Resource* buf);
};

}
Expand Down
4 changes: 4 additions & 0 deletions Engine/gapi/directx12/dxdevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ DxDevice::DxDevice(IDXGIAdapter1& adapter, const ApiEntry& dllApi)
arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH;
dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid<ID3D12CommandSignature>(), reinterpret_cast<void**>(&drawMeshIndirectSgn)));
}

arg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH;
desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
dxAssert(device->CreateCommandSignature(&desc, nullptr, uuid<ID3D12CommandSignature>(), reinterpret_cast<void**>(&dispatchIndirectSgn)));
}

allocator.setDevice(*this);
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/directx12/dxdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ class DxDevice : public AbstractGraphicsApi::Device {

ComPtr<ID3D12CommandSignature> drawIndirectSgn;
ComPtr<ID3D12CommandSignature> drawMeshIndirectSgn;
ComPtr<ID3D12CommandSignature> dispatchIndirectSgn;

DxAllocator allocator;
DxDescriptorAllocator descAlloc;
Expand Down
3 changes: 2 additions & 1 deletion Engine/gapi/flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ enum PipelineStage : uint8_t {
S_RtAs,
S_Compute,
S_Graphics,
S_Indirect,

S_First = S_Transfer,
S_Count = S_Graphics+1,
S_Count = S_Indirect+1,
};

}
5 changes: 5 additions & 0 deletions Engine/gapi/metal/mtcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@ void MtCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
encComp->dispatchThreadgroups(MTL::Size(x,y,z), localSize);
}

void MtCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
auto& ind = reinterpret_cast<const MtBuffer&>(indirect);
encDraw->dispatchThreadgroups(ind.impl.get(), offset, localSize);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

encComp

}

void MtCommandBuffer::implSetBytes(const void* bytes, size_t sz) {
auto& mtl = curLay->bindPush;
auto& l = curLay->pb;
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/metal/mtcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class MtCommandBuffer : public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier (const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;
void generateMipmap(AbstractGraphicsApi::Texture& image, uint32_t texWidth, uint32_t texHeight, uint32_t mipLevels) override;
Expand Down
6 changes: 2 additions & 4 deletions Engine/gapi/resourcestate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ void ResourceState::onUavUsage(NonUniqResId read, NonUniqResId write, PipelineSt
}

void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) {
const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr};
const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr};
const ResourceAccess rd[PipelineStage::S_Count] = {ResourceAccess::TransferSrc, ResourceAccess::RtAsRead, ResourceAccess::UavReadComp, ResourceAccess::UavReadGr, ResourceAccess::Indirect};
const ResourceAccess wr[PipelineStage::S_Count] = {ResourceAccess::TransferDst, ResourceAccess::RtAsWrite, ResourceAccess::UavWriteComp, ResourceAccess::UavWriteGr, ResourceAccess::None};
const ResourceAccess hv = (host ? ResourceAccess::TransferHost : ResourceAccess::None);

for(PipelineStage p = PipelineStage::S_First; p<PipelineStage::S_Count; p = PipelineStage(p+1)) {
Expand All @@ -86,8 +86,6 @@ void ResourceState::onUavUsage(const Usage& u, PipelineStage st, bool host) {
uavSrcBarrier = uavSrcBarrier | rd[p] | wr[p];
uavDstBarrier = uavDstBarrier | rd[st] | wr[st];

uavDstBarrier = uavDstBarrier | ResourceAccess::Indirect;

uavRead [st].depend[p] = NonUniqResId::I_None;
uavWrite[st].depend[p] = NonUniqResId::I_None;
}
Expand Down
15 changes: 13 additions & 2 deletions Engine/gapi/vulkan/vcommandbuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,17 @@ void VCommandBuffer::dispatch(size_t x, size_t y, size_t z) {
vkCmdDispatch(impl,uint32_t(x),uint32_t(y),uint32_t(z));
}

void VCommandBuffer::dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) {
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

curUniforms->ssboBarriers(resState, PipelineStage::S_Compute);
// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
resState.flush(*this);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What barrier will be emitted, if buffer is in used by both shader and indirect argument?
Should be SRC -> CS|indirect, plus buffer has to be in read-state.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created test that simulates this situation:
barrier {UavReadComp | UavWriteComp -> UavReadComp | UavWriteComp | Indirect}

Propably stupid question :) but does read-state here refer to the condition that previous memory accesses for the buffer are available and visible for reading (i.e. that required barrier exists)? My first thought was about some internal state in the engine but couldn't find it.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean ResourceState::uavRead. Need to make sure that subsequent command are aware that indirect-buffer is in use.
What I'm worry about, that resState.flush might reset tracking preemptively.

Basically in scenario

dispath( write to buf )
--- barrier {UavReadComp | UavWriteComp -> UavReadComp | UavWriteComp | Indirect}
dispathIndirect(buf)
--- should be execution barrier indirect -> CS
dispath( write to buf )

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added subsequent write access to the test, works fine:
barrier {TransferSrc | UavReadComp | UavReadGr | RtAsRead | Indirect -> UavWriteComp}
barrier {UavReadComp | UavWriteComp -> UavReadComp | UavWriteComp | Indirect}
barrier {UavReadComp | Indirect -> UavWriteComp}

Despite clearing uavSrcBarrier and uavDstBarrier in flush(), there is a dependency on previous reads in uavRead which was written in the end of onUavUsage(read, none, compute | indirect) calls. uavRead and uavWrite are cleared only for dependencies that were added to uavSrcBarrier and uavDstBarrier


vkCmdDispatchIndirect(impl, ind.impl, VkDeviceSize(offset));
}

void VCommandBuffer::setBytes(AbstractGraphicsApi::CompPipeline& p, const void* data, size_t size) {
VCompPipeline& px=reinterpret_cast<VCompPipeline&>(p);
assert(size<=px.pushSize);
Expand Down Expand Up @@ -509,7 +520,7 @@ void VCommandBuffer::drawIndirect(const AbstractGraphicsApi::Buffer& indirect, s
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);
vkCmdDrawIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0);
}
Expand All @@ -522,7 +533,7 @@ void VCommandBuffer::dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& ind
const VBuffer& ind = reinterpret_cast<const VBuffer&>(indirect);

// block future writers
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Graphics);
resState.onUavUsage(ind.nonUniqId, NonUniqResId::I_None, PipelineStage::S_Indirect);
//resState.flush(*this);
device.vkCmdDrawMeshTasksIndirect(impl, ind.impl, VkDeviceSize(offset), 1, 0);
}
Expand Down
1 change: 1 addition & 0 deletions Engine/gapi/vulkan/vcommandbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class VCommandBuffer:public AbstractGraphicsApi::CommandBuffer {
void dispatchMeshIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void dispatch (size_t x, size_t y, size_t z) override;
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override;

void barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) override;

Expand Down
6 changes: 6 additions & 0 deletions Engine/graphics/encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ void Encoder<Tempest::CommandBuffer>::dispatchThreads(Size sz) {
dispatchThreads(size_t(sz.w), size_t(sz.h), 1);
}

void Encoder<CommandBuffer>::dispatchIndirect(const StorageBuffer& indirect, size_t offset) {
if (offset % 4 != 0)
throw std::system_error(Tempest::GraphicsErrc::InvalidStorageBuffer);
impl->dispatchIndirect(*indirect.impl.impl.handler, offset);
}

void Encoder<CommandBuffer>::setFramebuffer(std::initializer_list<AttachmentDesc> rd, AttachmentDesc zd) {
implSetFramebuffer(rd.begin(),rd.size(),&zd);
}
Expand Down
1 change: 1 addition & 0 deletions Engine/graphics/encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class Encoder<Tempest::CommandBuffer> {
void dispatchMeshThreads(Size sz);

void dispatch(size_t x, size_t y=1, size_t z=1);
void dispatchIndirect(const StorageBuffer& indirect, size_t offset);
void dispatchThreads(size_t x, size_t y=1, size_t z=1);
void dispatchThreads(Size sz);

Expand Down
13 changes: 13 additions & 0 deletions Tests/tests/resourcestate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ struct TestCommandBuffer : Tempest::AbstractGraphicsApi::CommandBuffer {
void drawIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {}

void dispatch (size_t x, size_t y, size_t z) override {}
void dispatchIndirect(const AbstractGraphicsApi::Buffer& indirect, size_t offset) override {}
};

void TestCommandBuffer::barrier(const AbstractGraphicsApi::BarrierDesc* desc, size_t cnt) {
Expand Down Expand Up @@ -171,5 +172,17 @@ TEST(main, ResourceStateBlas) {
rs.flush(cmd);
}

TEST(main, ResourceStateIndirect) {
TestCommandBuffer cmd;

ResourceState rs;

rs.onUavUsage(NonUniqResId::I_None, NonUniqResId(0x1), PipelineStage::S_Compute);
rs.flush(cmd);

rs.onUavUsage(NonUniqResId(0x1), NonUniqResId::I_None, PipelineStage::S_Indirect);
rs.flush(cmd);
Try marked this conversation as resolved.
Show resolved Hide resolved
}