diff --git a/backends/vulkan/runtime/graph/ops/glsl/clone.glsl b/backends/vulkan/runtime/graph/ops/glsl/clone.glsl new file mode 100644 index 00000000000..64def8d7000 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/clone.glsl @@ -0,0 +1,30 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#version 450 core + +#define PRECISION ${PRECISION} + +layout(std430) buffer; + +layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; +layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; + +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; + +void main() { + ivec3 pos = ivec3(gl_GlobalInvocationID); + if (any(greaterThanEqual(pos, out_limits))) { + return; + } + imageStore(image_out, pos, texelFetch(image_in, pos, 0)); +} diff --git a/backends/vulkan/runtime/graph/ops/glsl/clone.yaml b/backends/vulkan/runtime/graph/ops/glsl/clone.yaml new file mode 100644 index 00000000000..5dbce0e9d8d --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/clone.yaml @@ -0,0 +1,10 @@ +clone: + parameter_names_with_default_values: + DTYPE: float + NDIM: 3 + generate_variant_forall: + DTYPE: + - VALUE: half + - VALUE: float + shader_variants: + - NAME: clone diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp new file mode 100644 index 00000000000..e95e7bdc00d --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +#include +#include +#include + +namespace vkcompute { + +void add_clone_node( + ComputeGraph& graph, + const ValueRef in, + const ValueRef out) { + vTensorPtr t_out = graph.get_tensor(out); + + std::string kernel_name = "clone"; + add_dtype_suffix(kernel_name, *t_out); + + api::utils::uvec3 global_size = t_out->extents(); + api::utils::uvec3 local_size = adaptive_work_group_size(global_size); + + graph.execute_nodes().emplace_back(new ExecuteNode( + graph, + VK_KERNEL_FROM_STR(kernel_name), + global_size, + local_size, + {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}}, + {t_out->texture_limits_ubo()})); +} + +void clone(ComputeGraph& graph, const std::vector& args) { + // The vulkan delegate does not support changing memory format. + return add_clone_node(graph, args[0], args[2]); +} + +// Clone node is not the most efficient implementation for the aten.clone +// operation. A more efficient implementation can be achieved during vulkan +// export with the use of shared object. This clone node is introduced to enable +// a "copy" mechanism if there is no alternative (e.g. during direct +// ComputeGraph manipulation, we need to make a copy of a Tensor). + +REGISTER_OPERATORS { + VK_REGISTER_OP(aten.clone.default, clone); +} + +} // namespace vkcompute diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py index 9c4ed7dacd7..8101c1d6fe2 100644 --- a/backends/vulkan/test/op_tests/cases.py +++ b/backends/vulkan/test/op_tests/cases.py @@ -361,6 +361,25 @@ def get_unsqueeze_inputs(): return test_suite +def get_clone_inputs(): + test_suite = VkTestSuite( + [ + ((S2, S1, S2, S1),), + ((S2, S1, S2),), + ((S2, S1),), + ((S2,),), + ((XS, S1, XS, S1),), + ((XS, S1, XS),), + ((S1, XS, S1),), + ((XS, S1),), + ((S1, XS),), + ((S1,),), + ((XS,),), + ] + ) + return test_suite + + test_suites = { "aten.add.Tensor": get_binary_elementwise_inputs(), "aten.sub.Tensor": get_binary_elementwise_inputs(), @@ -378,4 +397,5 @@ def get_unsqueeze_inputs(): "aten.view_copy.default": get_view_inputs(), "aten.slice_copy.Tensor": get_slice_inputs(), "aten.unsqueeze_copy.default": get_unsqueeze_inputs(), + "aten.clone.default": get_clone_inputs(), } diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py index b1c08e6d0d8..1e28519ebfb 100644 --- a/backends/vulkan/test/op_tests/utils/codegen.py +++ b/backends/vulkan/test/op_tests/utils/codegen.py @@ -21,7 +21,8 @@ OPT_DEVICE, OPT_INT64, OPT_LAYOUT, - OPT_SCALARTYPE, + OPT_MEMORY_FORMAT, + OPT_SCALAR_TYPE, TestSuite, TestSuiteGen, THREE_TENSOR_TUPLE, @@ -250,10 +251,11 @@ def create_value_for(self, ref: ValueRefList) -> str: # noqa: C901 elif ref.src_cpp_type == DOUBLE: ret_str += f"add_scalar({ref.src_cpp_name}); \n" elif ( - ref.src_cpp_type == OPT_SCALARTYPE + ref.src_cpp_type == OPT_SCALAR_TYPE or ref.src_cpp_type == OPT_LAYOUT or ref.src_cpp_type == OPT_DEVICE or ref.src_cpp_type == OPT_BOOL + or ref.src_cpp_type == OPT_MEMORY_FORMAT ): ret_str += "add_none(); \n" elif ref.src_cpp_type == TWO_TENSOR_TUPLE: diff --git a/backends/vulkan/test/op_tests/utils/codegen_base.py b/backends/vulkan/test/op_tests/utils/codegen_base.py index 986526fbdcc..d5feada1df8 100644 --- a/backends/vulkan/test/op_tests/utils/codegen_base.py +++ b/backends/vulkan/test/op_tests/utils/codegen_base.py @@ -25,7 +25,8 @@ OPT_INT64 = "::std::optional" OPT_DEVICE = "::std::optional" OPT_LAYOUT = "::std::optional" -OPT_SCALARTYPE = "::std::optional" +OPT_MEMORY_FORMAT = "::std::optional" +OPT_SCALAR_TYPE = "::std::optional" TWO_TENSOR_TUPLE = "::std::tuple" THREE_TENSOR_TUPLE = "::std::tuple" @@ -149,10 +150,11 @@ def create_input_data(self, arg: Argument, data: Any) -> str: # noqa: C901 else: ret_str += f"{str(data)};" elif ( - cpp_type == OPT_SCALARTYPE + cpp_type == OPT_SCALAR_TYPE or cpp_type == OPT_LAYOUT or cpp_type == OPT_DEVICE or cpp_type == OPT_BOOL + or cpp_type == OPT_MEMORY_FORMAT ): ret_str += "std::nullopt;" else: