diff --git a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp index 08363fa71e4..d1eaba9a551 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Cat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Cat.cpp @@ -31,10 +31,10 @@ void add_cat_default_node( int64_t dim = graph.extract_scalar(dim_ref); vTensorPtr t_out = graph.get_tensor(out); - NchwDim nchw_dim = normalize_to_nchw_dim(*t_out, dim); + Dim4DType dim4d = normalize_to_dim4d(*t_out, dim); // TODO: Find ways to factor out the similar code for width, height, and batch - if (nchw_dim == DimWidth) { + if (dim4d == DIM4D_WIDTH) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -46,7 +46,7 @@ void add_cat_default_node( dst_offset.data[0] += range.data[0]; } - } else if (nchw_dim == DimHeight) { + } else if (dim4d == DIM4D_HEIGHT) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -57,7 +57,7 @@ void add_cat_default_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset.data[1] += range.data[1]; } - } else if (nchw_dim == DimBatch) { + } else if (dim4d == DIM4D_BATCH) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -68,19 +68,19 @@ void add_cat_default_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset.data[2] += range.data[2]; } - } else if (nchw_dim == DimChannel) { + } else if (dim4d == DIM4D_CHANNEL) { int32_t src_offset = 0; int32_t dst_offset = 0; for (ValueRef input_ref : *input_list) { vTensorPtr t_in = graph.get_tensor(input_ref); - int32_t range = dim_at(t_in->sizes()); + int32_t range = dim_at(t_in->sizes(), DIM4D_CHANNEL); add_copy_channel_offset_node( graph, input_ref, range, src_offset, dst_offset, out); dst_offset += range; } } else { - VK_THROW("Unexpected value of nchw_dim=", nchw_dim); + VK_THROW("Unexpected value of dim4d=", dim4d); } } diff --git a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp index 5ca4973e56f..8a670283c9b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Copy.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Copy.cpp @@ -92,23 +92,23 @@ void add_copy_channel_offset_node( VK_CHECK_COND(t_out->dim() >= 3, "Dst dim should be at least 3"); VK_CHECK_COND( - dim_at(in_sizes) >= src_channel_offset + channel_range, + dim_at(in_sizes) >= src_channel_offset + channel_range, "Src channel (", src_channel_offset, ") and range (", channel_range, ") should be less than or equal to input tensor's channel size (", - dim_at(in_sizes), + dim_at(in_sizes), ")"); VK_CHECK_COND( - dim_at(out_sizes) >= dst_channel_offset + channel_range, + dim_at(out_sizes) >= dst_channel_offset + channel_range, "Dst channel (", dst_channel_offset, ") and range (", channel_range, ") should be less than or equal to input tensor's channel size (", - dim_at(out_sizes), + dim_at(out_sizes), ")"); VK_CHECK_COND(channel_range >= 0, "Channel range must be non-negative"); @@ -121,10 +121,10 @@ void add_copy_channel_offset_node( kernel_name.reserve(kShaderNameReserve); add_dtype_suffix(kernel_name, *t_out); - int32_t out_channels = dim_at(out_sizes); + int32_t out_channels = dim_at(out_sizes); // Copy one batch at a time. - for (int batch_idx = 0; batch_idx < dim_at(in_sizes); + for (int batch_idx = 0; batch_idx < dim_at(in_sizes); batch_idx++) { // Mapping the tensor NCHW coordinates into texture XYZ coordinates int32_t dst_first_z = dst_channel_offset / 4; @@ -139,8 +139,8 @@ void add_copy_channel_offset_node( 0, 0, dst_first_z + batch_idx * api::utils::div_up(out_channels, 4)}; uvec3 global_size{ - dim_at(in_sizes), - dim_at(in_sizes), + dim_at(in_sizes), + dim_at(in_sizes), api::utils::safe_downcast(dst_last_z - dst_first_z + 1)}; uvec3 local_size = adaptive_work_group_size(global_size); diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp index 14b77e3b451..e5c78ada03b 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp @@ -70,8 +70,8 @@ void add_permute_node( kernel_name.reserve(kShaderNameReserve); add_dtype_suffix(kernel_name, *t_out); - uint32_t out_channels = dim_at(t_out->sizes()); - uint32_t in_channels = dim_at(t_in->sizes()); + uint32_t out_channels = dim_at(t_out->sizes()); + uint32_t in_channels = dim_at(t_in->sizes()); uint32_t out_c_aligned = api::utils::align_up(out_channels, 4u); uint32_t in_c_aligned = api::utils::align_up(in_channels, 4u); diff --git a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp index dedc7978ada..636765888b5 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Repeat.cpp @@ -32,23 +32,23 @@ void check_args( "Input tensor dim size must be not greater than the repeat argument's size"); VK_CHECK_COND( - dim_at(in.sizes()) * dim_at(repeats) == - dim_at(out.sizes()), + dim_at(in.sizes()) * dim_at(repeats) == + dim_at(out.sizes()), "Output's width doesn't match input's width * repeat count"); VK_CHECK_COND( - dim_at(in.sizes()) * dim_at(repeats) == - dim_at(out.sizes()), + dim_at(in.sizes()) * dim_at(repeats) == + dim_at(out.sizes()), "Output's height doesn't match input's height * repeat count"); VK_CHECK_COND( - dim_at(in.sizes()) * dim_at(repeats) == - dim_at(out.sizes()), + dim_at(in.sizes()) * dim_at(repeats) == + dim_at(out.sizes()), "Output's channel doesn't match input's channel * repeat count"); VK_CHECK_COND( - dim_at(in.sizes()) * dim_at(repeats) == - dim_at(out.sizes()), + dim_at(in.sizes()) * dim_at(repeats) == + dim_at(out.sizes()), "Output's batch doesn't match input's batch * repeat count"); } @@ -70,13 +70,13 @@ void add_repeat_channel_node( const std::vector& in_sizes = t_in->sizes(); int32_t in_width = - api::utils::safe_downcast(dim_at(in_sizes)); + api::utils::safe_downcast(dim_at(in_sizes)); int32_t in_height = - api::utils::safe_downcast(dim_at(in_sizes)); + api::utils::safe_downcast(dim_at(in_sizes)); int32_t in_channel = - api::utils::safe_downcast(dim_at(in_sizes)); + api::utils::safe_downcast(dim_at(in_sizes)); int32_t in_batch = - api::utils::safe_downcast(dim_at(in_sizes)); + api::utils::safe_downcast(dim_at(in_sizes)); int32_t out_channel = repeat_channel * in_channel; @@ -142,7 +142,7 @@ void add_repeat_node( // dimension, we copy over the input texure to the output. In subsequent // dimensions, we read and write from the same tensor. - if (int64_t channel_repeat = dim_at(repeats); + if (int64_t channel_repeat = dim_at(repeats); channel_repeat == 1) { // If no repeat, short-cut to a direct copy api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -156,12 +156,12 @@ void add_repeat_node( // TODO: refactor width, height, and batch into a common helper function. // Width - if (int64_t width_repeat = dim_at(repeats); width_repeat > 1) { + if (int64_t width_repeat = dim_at(repeats); width_repeat > 1) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); for (int i = 1; i < width_repeat; ++i) { api::utils::ivec3 dst_offset = api::utils::make_ivec3( - {i * dim_at(in_sizes), 0, 0}, false); + {i * dim_at(in_sizes), 0, 0}, false); add_copy_offset_node( graph, out, running_range, src_offset, dst_offset, out); @@ -171,13 +171,13 @@ void add_repeat_node( } // Height - if (int64_t height_repeat = dim_at(repeats); + if (int64_t height_repeat = dim_at(repeats); height_repeat > 1) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); for (int i = 1; i < height_repeat; ++i) { api::utils::ivec3 dst_offset = api::utils::make_ivec3( - {0, i * dim_at(in_sizes), 0}, false); + {0, i * dim_at(in_sizes), 0}, false); add_copy_offset_node( graph, out, running_range, src_offset, dst_offset, out); @@ -187,7 +187,7 @@ void add_repeat_node( } // Batch - if (int64_t batch_repeat = dim_at(repeats); batch_repeat > 1) { + if (int64_t batch_repeat = dim_at(repeats); batch_repeat > 1) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); for (int i = 1; i < batch_repeat; ++i) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp index bceec27baee..8cab1505505 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Slice.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Slice.cpp @@ -43,8 +43,7 @@ void add_slice_tensor_out_node( dim = normalize(dim, t_in->dim()); - // Create a dim value as in the underlying dim is 4-dimension. - int64_t nchw_dim = dim + (4 - t_in->dim()); + Dim4DType dim4d = normalize_to_dim4d(*t_in, dim); std::optional opt_start = graph.extract_optional_scalar(opt_start_ref); @@ -61,7 +60,7 @@ void add_slice_tensor_out_node( VK_CHECK_COND((0 <= start) && (start < in_sizes[dim])); VK_CHECK_COND((0 <= end) && (end <= in_sizes[dim])); - if (nchw_dim == 1) { + if (dim4d == DIM4D_CHANNEL) { // slice by channel std::string kernel_name = "slice_channel"; kernel_name.reserve(kShaderNameReserve); @@ -93,17 +92,17 @@ void add_slice_tensor_out_node( // GPU's coordinate is in x, y, z int64_t gpu_dim = -1; int64_t stride = 1; - if (nchw_dim == 3) { + if (dim4d == DIM4D_WIDTH) { gpu_dim = 0; // width: x dimension in gpu VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - } else if (nchw_dim == 2) { + } else if (dim4d == DIM4D_HEIGHT) { gpu_dim = 1; // height: y dimension VK_CHECK_COND(out_sizes[dim] == (1 + (end - start - 1) / step)); - } else if (nchw_dim == 0) { + } else if (dim4d == DIM4D_BATCH) { gpu_dim = 2; // batch: z dimension // Due to channel packing, each batch value is span over stride planes - int64_t n_channels = dim_at(in_sizes); + int64_t n_channels = dim_at(in_sizes, DIM4D_CHANNEL); stride = api::utils::div_up(n_channels, 4ll); } else { VK_THROW("Unexpected ncwh_dim!"); diff --git a/backends/vulkan/runtime/graph/ops/impl/Split.cpp b/backends/vulkan/runtime/graph/ops/impl/Split.cpp index 2d218f722a2..9347ea7a411 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Split.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Split.cpp @@ -29,7 +29,7 @@ void add_split_with_sizes_default_node( ValueListPtr out_list = graph.get_value_list(out_list_ref); - NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim); + Dim4DType dim4d = normalize_to_dim4d(*t_in, dim); VK_CHECK_COND(out_list->size() == split_sizes.size()); @@ -39,10 +39,10 @@ void add_split_with_sizes_default_node( vTensorPtr t_out = graph.get_tensor(out_ref); VK_CHECK_COND(check_memory_layout_is(*t_out, api::kChannelsPacked)); - VK_CHECK_COND(dim_at(*t_out, nchw_dim) == split_size); + VK_CHECK_COND(dim_at(*t_out, dim4d) == split_size); } - if (nchw_dim == DimWidth) { + if (dim4d == DIM4D_WIDTH) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -55,7 +55,7 @@ void add_split_with_sizes_default_node( src_offset.data[0] += range.data[0]; } - } else if (nchw_dim == DimHeight) { + } else if (dim4d == DIM4D_HEIGHT) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -66,7 +66,7 @@ void add_split_with_sizes_default_node( src_offset.data[1] += range.data[1]; } - } else if (nchw_dim == DimBatch) { + } else if (dim4d == DIM4D_BATCH) { api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); @@ -77,13 +77,13 @@ void add_split_with_sizes_default_node( src_offset.data[2] += range.data[2]; } - } else if (nchw_dim == DimChannel) { + } else if (dim4d == DIM4D_CHANNEL) { int32_t src_offset = 0; int32_t dst_offset = 0; for (ValueRef out_ref : *out_list) { vTensorPtr t_out = graph.get_tensor(out_ref); - int32_t range = dim_at(t_out->sizes()); + int32_t range = dim_at(t_out->sizes()); add_copy_channel_offset_node( graph, in, range, src_offset, dst_offset, out_ref); src_offset += range; @@ -122,8 +122,8 @@ void add_split_tensor_node( int64_t dim = graph.extract_scalar(dim_ref); vTensorPtr t_in = graph.get_tensor(in); - NchwDim nchw_dim = normalize_to_nchw_dim(*t_in, dim); - int64_t size = dim_at(*t_in, nchw_dim); + Dim4DType dim4d = normalize_to_dim4d(*t_in, dim); + int64_t size = dim_at(*t_in, dim4d); std::vector split_sizes(size / split_size, split_size); add_split_with_sizes_default_node(graph, in, split_sizes, dim, out); diff --git a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h index c5a47b7776a..52d8f1f3bfe 100644 --- a/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h +++ b/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h @@ -12,27 +12,6 @@ namespace vkcompute { -// A canonical way to represent dimensions as enum. Motivation behind a -// canonical enum is that in the user tensor, it is using a "big-endian"-ish -// mechanism to reference a dimension in a nchw-tensor, leading to tensor of -// different dimension have different mapping from dim to the underlying texture -// dimension. For instasnce, for a 2d (height x width) tensors, dim 0 refers to -// height and dim 1 refers to width; for a 4d (batch x channel x height x width) -// tensor, dim 0 refers to batch and dim 1 refers to channel. Using this -// canonical enum allows us to bring clarity in code. - -enum NchwDim : uint32_t { - DimWidth = 1u, - DimHeight = 2u, - DimChannel = 3u, - DimBatch = 4u, -}; - -// Convert a dim provided by user into canonical enum. -inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) { - return static_cast(v_in.dim() - dim); -} - /* * Maps a semantic dimension name to an integer that * corresponds to its innermost ordering in a 4D tensor in @@ -40,13 +19,17 @@ inline NchwDim normalize_to_nchw_dim(const vTensor& v_in, int32_t dim) { * corresponds to 1, height is the next innermost, so it * corresponds to 2, and so on. */ -struct Dim4D { - static constexpr uint32_t Width = DimWidth; - static constexpr uint32_t Height = DimHeight; - static constexpr uint32_t Channel = DimChannel; - static constexpr uint32_t Batch = DimBatch; +enum Dim4DType : uint32_t { + DIM4D_WIDTH = 1u, + DIM4D_HEIGHT = 2u, + DIM4D_CHANNEL = 3u, + DIM4D_BATCH = 4u, }; +inline Dim4DType normalize_to_dim4d(const vTensor& v_in, int32_t dim) { + return static_cast(v_in.dim() - dim); +} + /* * Semantic dimension names for a 1D tensor */ @@ -86,11 +69,11 @@ uint32_t dim_at(const std::vector& sizes) { return dims < N ? 1 : api::utils::safe_downcast(sizes[dims - N]); } -inline uint32_t dim_at(const std::vector& sizes, NchwDim nchw_dim) { +inline uint32_t dim_at(const std::vector& sizes, Dim4DType dim4d) { const uint32_t dims = sizes.size(); - return dims < nchw_dim + return dims < dim4d ? 1 - : api::utils::safe_downcast(sizes[dims - nchw_dim]); + : api::utils::safe_downcast(sizes[dims - dim4d]); } template @@ -98,23 +81,23 @@ uint32_t dim_at(const vTensor& v_in) { return dim_at(v_in.sizes()); } -inline uint32_t dim_at(const vTensor& v_in, NchwDim nchw_dim) { - return dim_at(v_in.sizes(), nchw_dim); +inline uint32_t dim_at(const vTensor& v_in, Dim4DType dim4d) { + return dim_at(v_in.sizes(), dim4d); } -inline std::ostream& operator<<(std::ostream& os, NchwDim nchw_dim) { - switch (nchw_dim) { - case DimWidth: - os << "DimWidth"; +inline std::ostream& operator<<(std::ostream& os, Dim4DType dim4d) { + switch (dim4d) { + case DIM4D_WIDTH: + os << "DIM4D_WIDTH"; break; - case DimHeight: - os << "DimHeight"; + case DIM4D_HEIGHT: + os << "DIM4D_HEIGHT"; break; - case DimChannel: - os << "DimChannel"; + case DIM4D_CHANNEL: + os << "DIM4d_CHANNEL"; break; - case DimBatch: - os << "DimBatch"; + case DIM4D_BATCH: + os << "DIM4D_BATCH"; break; default: os << "DimUnknown"; @@ -122,5 +105,4 @@ inline std::ostream& operator<<(std::ostream& os, NchwDim nchw_dim) { } return os; } - } // namespace vkcompute