diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index 5f2d2eb72c7..f597fe8d418 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -101,6 +101,24 @@ void Context::register_shader_dispatch( cmd_.dispatch(effective_global_wg); } + +void Context::register_copy( + PipelineBarrier& pipeline_barrier, + const VulkanImage& src, + const VulkanImage& dst, + const api::utils::uvec3& copy_range, + const api::utils::uvec3& src_offset, + const api::utils::uvec3& dst_offset) { + cmd_.insert_barrier(pipeline_barrier); + cmd_.copy_texture_to_texture( + src, + dst, + copy_range, + src_offset, + dst_offset); +} + + void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) { if (cmd_) { cmd_.end(); diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h index 0813d4190de..9657f4b2d95 100644 --- a/backends/vulkan/runtime/api/Context.h +++ b/backends/vulkan/runtime/api/Context.h @@ -180,6 +180,14 @@ class Context final { const ShaderInfo&, const utils::uvec3&); + void register_copy( + PipelineBarrier&, + const VulkanImage& src, + const VulkanImage& dst, + const api::utils::uvec3& copy_range, + const api::utils::uvec3& src_offset, + const api::utils::uvec3& dst_offset); + template bool submit_copy( PipelineBarrier&, diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp index 08a17a18872..9fd19ccf336 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.cpp @@ -33,14 +33,28 @@ ExecuteNode::ExecuteNode( graph.update_descriptor_counts(shader, /*execute = */ true); } -void ExecuteNode::encode(ComputeGraph* graph) { +ExecuteNode::ExecuteNode( + ComputeGraph& graph, + const ArgGroup& src, + const ArgGroup& dst, + const api::utils::uvec3& copy_range, + const api::utils::uvec3& src_offset, + const api::utils::uvec3& dst_offset) + : + src_(src), dst_(dst), copy_range_(copy_range), + src_offset_(src_offset), dst_offset_(dst_offset) { + // TODO: Update descriptor counts in graph. +} + + +void ExecuteNode::encode_shader(ComputeGraph* graph) { api::Context* const context = graph->context(); api::PipelineBarrier pipeline_barrier{}; std::unique_lock cmd_lock = context->dispatch_lock(); api::DescriptorSet descriptor_set = - context->get_descriptor_set(shader_, local_workgroup_size_); + context->get_descriptor_set(shader_, *local_workgroup_size_); uint32_t idx = 0; idx = bind_values_to_descriptor_set( @@ -48,7 +62,38 @@ void ExecuteNode::encode(ComputeGraph* graph) { bind_params_to_descriptor_set(params_, descriptor_set, idx); context->register_shader_dispatch( - descriptor_set, pipeline_barrier, shader_, global_workgroup_size_); + descriptor_set, pipeline_barrier, shader_, *global_workgroup_size_); +} + +void ExecuteNode::encode_copy(ComputeGraph* graph) { + api::Context* const context = graph->context(); + api::PipelineBarrier pipeline_barrier{}; + + vTensorPtr src_v_t = graph->get_tensor(src_->refs[0]); + api::VulkanImage& src_image = src_v_t->image( + pipeline_barrier, + api::PipelineStage::COMPUTE, api::MemoryAccessType::READ); + + vTensorPtr dst_v_t = graph->get_tensor(dst_->refs[0]); + api::VulkanImage& dst_image = dst_v_t->image( + pipeline_barrier, + api::PipelineStage::COMPUTE, api::MemoryAccessType::WRITE); + + context->register_copy( + pipeline_barrier, + src_image, + dst_image, + *copy_range_, + *src_offset_, + *dst_offset_); +} + +void ExecuteNode::encode(ComputeGraph* graph) { + if (shader_.src_code.size > 0) { + return encode_shader(graph); + } else { + return encode_copy(graph); + } } } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/ExecuteNode.h b/backends/vulkan/runtime/graph/ops/ExecuteNode.h index b63273023ed..b9197411571 100644 --- a/backends/vulkan/runtime/graph/ops/ExecuteNode.h +++ b/backends/vulkan/runtime/graph/ops/ExecuteNode.h @@ -1,3 +1,5 @@ +//123123 + /* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. @@ -12,6 +14,8 @@ #include +#include + namespace vkcompute { class ComputeGraph; @@ -24,6 +28,8 @@ struct ArgGroup { ArgGroup(const ValueRef ref, const api::MemoryAccessType access) : refs{ref}, access(access) {} + ArgGroup(const ArgGroup& ag): refs(ag.refs), access(ag.access) {} + ArgGroup( const std::vector& refs, const api::MemoryAccessType access) @@ -58,6 +64,14 @@ class ExecuteNode final { const ResizeFunction& resize_fn = nullptr, const std::vector& resize_args = {}); + ExecuteNode( + ComputeGraph& graph, + const ArgGroup& src, + const ArgGroup& dst, + const api::utils::uvec3& copy_range, + const api::utils::uvec3& src_offset, + const api::utils::uvec3& dst_offset); + ~ExecuteNode() = default; void encode(ComputeGraph* graph); @@ -70,12 +84,22 @@ class ExecuteNode final { protected: const api::ShaderInfo shader_; - const api::utils::uvec3 global_workgroup_size_; - const api::utils::uvec3 local_workgroup_size_; + const std::optional global_workgroup_size_; + const std::optional local_workgroup_size_; const std::vector args_; std::vector> params_; const ResizeFunction resize_fn_; const std::vector resize_args_; + + const std::optional src_; + const std::optional dst_; + const std::optional copy_range_; + const std::optional src_offset_; + const std::optional dst_offset_; + + private: + void encode_shader(ComputeGraph *graph); + void encode_copy(ComputeGraph *graph); }; } // namespace vkcompute diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 271977c1450..e3129b2366c 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -445,6 +445,60 @@ TEST_F(VulkanComputeAPITest, texture_virtual_resize) { } } +TEST_F(VulkanComputeAPITest, copy_test) { + std::vector sizes = {1, 3, 4}; // c, h, w + vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); + api::MemoryAllocation a_mem = allocate_memory_for(a); + a.image().bind_allocation(a_mem); + + std::vector data_a(a.gpu_numel()); + std::iota(data_a.begin(), data_a.end(), 0.0); + fill_vtensor(a, data_a); + + vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); + api::MemoryAllocation b_mem = allocate_memory_for(b); + b.image().bind_allocation(b_mem); + + // Force clear memory + fill_vtensor(b, 0.0); + + auto context = api::context(); + + // These operates on texture coordinate in (x, y, z), corresponds to (w, h, + // packed_c). + api::utils::uvec3 copy_range{2, 2, 1}; + api::utils::uvec3 src_offset{0, 0, 0}; + api::utils::uvec3 dst_offset{2, 1, 0}; + + vkcompute::api::PipelineBarrier pipeline_barrier{}; + + context->register_copy( + pipeline_barrier, + a.image(), + b.image(), + copy_range, + src_offset, + dst_offset + ); + + submit_to_gpu(); + + // Fetch result back + std::vector data_out(b.gpu_numel()); + extract_vtensor(b, data_out); + + // w shifted by 2, h shifted by 1. + std::vector expected{ + 0, 0, 0, 0, + 0, 0, 0, 1, + 0, 0, 4, 5, + }; + + for (size_t i = 0; i < expected.size(); i++) { + CHECK_VALUE(data_out, i, expected[i]); + } +} + // // Compute Graph Tests // @@ -793,6 +847,57 @@ TEST(VulkanComputeGraphTest, test_large_graph) { } } + +TEST(VulkanComputeGraphTest, test_register_copy) { + GraphConfig config; + ComputeGraph graph(config); + + std::vector size = {1, 3, 4}; + + auto memory_layout = api::GPUMemoryLayout::TENSOR_CHANNELS_PACKED; + IOValueRef a = graph.add_input_tensor(size, api::kFloat, memory_layout); + + IOValueRef out = {}; + out.value = graph.add_tensor(size, api::kFloat, memory_layout); + + api::utils::uvec3 copy_range{2, 2, 1}; + api::utils::uvec3 src_offset{0, 0, 0}; + api::utils::uvec3 dst_offset{2, 1, 0}; + + graph.execute_nodes().emplace_back(new ExecuteNode( + graph, + {a.value, api::MemoryAccessType::READ}, + {out.value, api::MemoryAccessType::WRITE}, + copy_range, + src_offset, + dst_offset)); + + out.staging = graph.set_output_tensor(out.value); + + graph.prepare(); + graph.encode_execute(); + + // The tensor region that is not within the dst_offset + copy_range region is + // undefined, since they are outside the copy region. Hence we set the target + // value from 1.0. In the expected value, 0.0 are the don't-care values. + fill_vtensor(graph, a, 1.0, /* iota = */ true); + + graph.execute(); + EXTRACT_TENSOR(out); + + std::vector expected{ + 0, 0, 0, 0, + 0, 0, 1, 2, + 0, 0, 5, 6, + }; + + for (size_t i = 0; i < expected.size(); i++) { + if (expected[i] > 0){ + CHECK_VALUE(data_out, i, expected[i]); + } + } +} + class VulkanToFromGPUShaderTest : public ::testing::Test { public: void SetUp() override {