diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp index e95e7bdc00d..a8ebcf023d1 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp @@ -42,6 +42,20 @@ void clone(ComputeGraph& graph, const std::vector& args) { return add_clone_node(graph, args[0], args[2]); } +void contiguous(ComputeGraph& graph, const std::vector& args) { + // The vulkan delegate does not support changing memory format. + return add_clone_node(graph, args[0], args[2]); +} + +void _to_copy(ComputeGraph& graph, const std::vector& args) { + // All arguments are ignored for the time being. + // _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, + // Device? device=None, bool? pin_memory=None, bool non_blocking=False, + // MemoryFormat? memory_format=None) -> Tensor + + return add_clone_node(graph, args[0], args[7]); +} + // Clone node is not the most efficient implementation for the aten.clone // operation. A more efficient implementation can be achieved during vulkan // export with the use of shared object. This clone node is introduced to enable @@ -50,6 +64,8 @@ void clone(ComputeGraph& graph, const std::vector& args) { REGISTER_OPERATORS { VK_REGISTER_OP(aten.clone.default, clone); + VK_REGISTER_OP(aten.contiguous.default, contiguous); + VK_REGISTER_OP(aten._to_copy.default, _to_copy); } } // namespace vkcompute diff --git a/backends/vulkan/runtime/graph/ops/impl/Transpose.cpp b/backends/vulkan/runtime/graph/ops/impl/Transpose.cpp new file mode 100644 index 00000000000..39829567869 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/impl/Transpose.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +#include + +namespace vkcompute { + +void add_t_default_node(ComputeGraph& graph, ValueRef in, ValueRef out) { + vTensorPtr t_in = graph.get_tensor(in); + + VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked)); + + // TODO: Verify 0-dim tensor + VK_CHECK_COND( + (1 <= t_in->dim()) && (t_in->dim() <= 2), + "aten.t tensor must be 1d or 2d"); + + std::vector permute_dims; + if (t_in->dim() == 1) { + permute_dims.emplace_back(0); + } else { + permute_dims.emplace_back(1); + permute_dims.emplace_back(0); + } + + add_permute_node(graph, in, permute_dims, out); +} + +void t_default(ComputeGraph& graph, const std::vector& args) { + add_t_default_node(graph, args[0], args[1]); +} + +REGISTER_OPERATORS { + VK_REGISTER_OP(aten.t.default, t_default); +} + +} // namespace vkcompute diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py index a326402cc39..5bb4e3cb50d 100644 --- a/backends/vulkan/test/op_tests/cases.py +++ b/backends/vulkan/test/op_tests/cases.py @@ -555,6 +555,25 @@ def get_split_tensor_inputs(): return test_suite +def get_t_inputs(): + test_suite = VkTestSuite( + [ + ((1, S1),), + ((S1, 1),), + ((S2, S2),), + ((S2, S1),), + ((S1, S2),), + ((S1,),), + ((1,),), + ] + ) + test_suite.layouts = [ + "api::kChannelsPacked", + ] + test_suite.data_gen = "make_seq_tensor" + return test_suite + + test_suites = { "aten.add.Tensor": get_binary_elementwise_inputs(), "aten.sub.Tensor": get_binary_elementwise_inputs(), @@ -573,8 +592,11 @@ def get_split_tensor_inputs(): "aten.slice_copy.Tensor": get_slice_inputs(), "aten.unsqueeze_copy.default": get_unsqueeze_inputs(), "aten.clone.default": get_clone_inputs(), + "aten.contiguous.default": get_clone_inputs(), + "aten._to_copy.default": get_clone_inputs(), "aten.repeat.default": get_repeat_inputs(), "aten.cat.default": get_cat_inputs(), "aten.split_with_sizes.default": get_split_with_sizes_inputs(), "aten.split.Tensor": get_split_tensor_inputs(), + "aten.t.default": get_t_inputs(), } diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py index a43998b47c9..7f47a1e8e61 100644 --- a/backends/vulkan/test/op_tests/utils/codegen.py +++ b/backends/vulkan/test/op_tests/utils/codegen.py @@ -17,6 +17,7 @@ CppTestFileGen, DOUBLE, INT, + MEMORY_FORMAT, OPT_AT_TENSOR, OPT_BOOL, OPT_DEVICE, @@ -231,7 +232,7 @@ def create_aten_method_call(self) -> str: # at::_ops::{name}::call(*), and ATEN_FN is a handly macro. cpp_sig = gen_static_dispatch_backend_call_signature(self.f_sig, self.f) exprs = translate_args(self.f_sig, cpp_sig) - func_call = f"ATEN_FN({self.f_sig.name()})({exprs});" + func_call = f"ATEN_FN({self.f_sig.func.name})({exprs});" return func_call def create_out_src(self) -> str: @@ -342,6 +343,7 @@ def create_value_for(self, ref: ValueRefList) -> str: # noqa: C901 or ref.src_cpp_type == OPT_DEVICE or ref.src_cpp_type == OPT_BOOL or ref.src_cpp_type == OPT_MEMORY_FORMAT + or ref.src_cpp_type == MEMORY_FORMAT ): ret_str += "add_none(); \n" elif ref.src_cpp_type == TWO_TENSOR_TUPLE: diff --git a/backends/vulkan/test/op_tests/utils/codegen_base.py b/backends/vulkan/test/op_tests/utils/codegen_base.py index 6dac97583c6..a4d8ff75f4b 100644 --- a/backends/vulkan/test/op_tests/utils/codegen_base.py +++ b/backends/vulkan/test/op_tests/utils/codegen_base.py @@ -22,6 +22,7 @@ BOOL = "bool" DOUBLE = "double" INT = "int64_t" +MEMORY_FORMAT = "at::MemoryFormat" OPT_AT_TENSOR = "::std::optional" OPT_BOOL = "::std::optional" OPT_INT64 = "::std::optional" @@ -174,6 +175,8 @@ def create_input_data(self, arg: Argument, data: Any) -> str: # noqa: C901 or cpp_type == OPT_MEMORY_FORMAT ): ret_str += "std::nullopt;" + elif cpp_type == MEMORY_FORMAT: + ret_str += "at::MemoryFormat::Contiguous;" else: raise RuntimeError(f"Unsupported cpp type {cpp_type}") return ret_str + "\n" @@ -267,6 +270,10 @@ def generate_suite_cpp(self) -> str: return at::from_blob(values.data(), sizes, at::kFloat).toType(dtype).detach().clone(); }} + +// torchgen assumes the "at" namespace is used for function default arguments. +using at::MemoryFormat; + {test_suites_cpp} """