Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

video_core: added support for indirect dispatches (gfx only) #637

Merged
merged 1 commit into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/video_core/amdgpu/liverpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,22 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
}
break;
}
case PM4ItOpcode::DispatchIndirect: {
const auto* dispatch_indirect =
reinterpret_cast<const PM4CmdDispatchIndirect*>(header);
const auto offset = dispatch_indirect->data_offset;
const auto ib_address = mapped_queues[GfxQueueId].indirect_args_addr;
const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
if (rasterizer && (regs.cs_program.dispatch_initiator & 1)) {
const auto cmd_address = reinterpret_cast<const void*>(header);
rasterizer->ScopeMarkerBegin(
fmt::format("dcb:{}:DispatchIndirect", cmd_address));
rasterizer->Breadcrumb(u64(cmd_address));
rasterizer->DispatchIndirect(ib_address, offset, size);
rasterizer->ScopeMarkerEnd();
}
break;
}
case PM4ItOpcode::NumInstances: {
const auto* num_instances = reinterpret_cast<const PM4CmdDrawNumInstances*>(header);
regs.num_instances.num_instances = num_instances->num_instances;
Expand All @@ -399,6 +415,12 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
regs.num_indices = index_size->num_indices;
break;
}
case PM4ItOpcode::SetBase: {
const auto* set_base = reinterpret_cast<const PM4CmdSetBase*>(header);
ASSERT(set_base->base_index == PM4CmdSetBase::BaseIndex::DrawIndexIndirPatchTable);
mapped_queues[GfxQueueId].indirect_args_addr = set_base->Address<u64>();
break;
}
case PM4ItOpcode::EventWrite: {
// const auto* event = reinterpret_cast<const PM4CmdEventWrite*>(header);
break;
Expand Down
1 change: 1 addition & 0 deletions src/video_core/amdgpu/liverpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ struct Liverpool {
std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{};
ComputeProgram cs_state{};
VAddr indirect_args_addr{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};

Expand Down
36 changes: 36 additions & 0 deletions src/video_core/amdgpu/pm4_cmds.h
Original file line number Diff line number Diff line change
Expand Up @@ -704,4 +704,40 @@ struct PM4CmdReleaseMem {
}
};

struct PM4CmdSetBase {
enum class BaseIndex : u32 {
DisplayListPatchTable = 0b0000,
DrawIndexIndirPatchTable = 0b0001,
GdsPartition = 0b0010,
CePartition = 0b0011,
};

PM4Type3Header header;
union {
BitField<0, 4, BaseIndex> base_index;
u32 dw1;
};
u32 address0;
u32 address1;

template <typename T>
T Address() const {
ASSERT(base_index == BaseIndex::DisplayListPatchTable ||
base_index == BaseIndex::DrawIndexIndirPatchTable);
return reinterpret_cast<T>(address0 | (u64(address1 & 0xffff) << 32u));
}
};

struct PM4CmdDispatchIndirect {
struct GroupDimensions {
u32 dim_x;
u32 dim_y;
u32 dim_z;
};

PM4Type3Header header;
u32 data_offset; ///< Byte aligned offset where the required data structure starts
u32 dispatch_initiator; ///< Dispatch Initiator Register
};

} // namespace AmdGpu
3 changes: 2 additions & 1 deletion src/video_core/buffer_cache/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ constexpr vk::BufferUsageFlags AllFlags =
vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer |
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer;
vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eIndirectBuffer;

std::string_view BufferTypeName(MemoryUsage type) {
switch (type) {
Expand Down
39 changes: 39 additions & 0 deletions src/video_core/renderer_vulkan/vk_rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,45 @@ void Rasterizer::DispatchDirect() {
cmdbuf.dispatch(cs_program.dim_x, cs_program.dim_y, cs_program.dim_z);
}

void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) {
RENDERER_TRACE;

const auto cmdbuf = scheduler.CommandBuffer();
const auto& cs_program = liverpool->regs.cs_program;
const ComputePipeline* pipeline = pipeline_cache.GetComputePipeline();
if (!pipeline) {
return;
}

try {
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache);
if (!has_resources) {
return;
}
} catch (...) {
UNREACHABLE();
}

scheduler.EndRendering();
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle());
const auto [buffer, base] = buffer_cache.ObtainBuffer(address, size, true);
const auto total_offset = base + offset;

// Emulate PFP-to-ME sync packet
const vk::BufferMemoryBarrier ib_barrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eIndirectCommandRead,
.buffer = buffer->Handle(),
.offset = total_offset,
.size = size,
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eDrawIndirect,
vk::DependencyFlagBits::eByRegion, {}, ib_barrier, {});

cmdbuf.dispatchIndirect(buffer->Handle(), total_offset);
}

u64 Rasterizer::Flush() {
const u64 current_tick = scheduler.CurrentTick();
SubmitInfo info{};
Expand Down
1 change: 1 addition & 0 deletions src/video_core/renderer_vulkan/vk_rasterizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class Rasterizer {
void Draw(bool is_indexed, u32 index_offset = 0);

void DispatchDirect();
void DispatchIndirect(VAddr address, u32 offset, u32 size);

void ScopeMarkerBegin(const std::string_view& str);
void ScopeMarkerEnd();
Expand Down
Loading