diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp index 6cbba048528..402d35d75bb 100644 --- a/backends/vulkan/runtime/api/Tensor.cpp +++ b/backends/vulkan/runtime/api/Tensor.cpp @@ -139,8 +139,10 @@ vTensor::vTensor( // Calculate sizes and strides sizes_(sizes.begin(), sizes.end()), gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)}, - // Utility Uniform Buffer that can be passed to shaders as arguments - sizes_uniform_(context, api::utils::make_whcn_ivec4(sizes_)), + texture_limits_{{0, 0, 0}}, + // Utility Uniform Buffers that can be passed to shaders as arguments + sizes_uniform_(), + texture_limits_uniform_(), // Construct Tensor storage storage_( context, @@ -149,6 +151,13 @@ vTensor::vTensor( gpu_sizes_, dtype_, allocate_memory) { + if (storage_type != api::kBuffer) { + texture_limits_.limits = api::utils::ivec3{ + api::utils::safe_downcast(storage_.extents_.data[0]), + api::utils::safe_downcast(storage_.extents_.data[1]), + api::utils::safe_downcast(storage_.extents_.data[2])}; + } + if (dtype == api::kHalf) { VK_CHECK_COND( api::context()->adapter_ptr()->has_16bit_storage(), @@ -187,6 +196,22 @@ api::VulkanBuffer& vTensor::buffer( return storage_.buffer_; } +const api::BufferBindInfo vTensor::sizes_ubo() { + if (!sizes_uniform_.buffer()) { + sizes_uniform_ = api::UniformParamsBuffer( + storage_.context_, api::utils::make_whcn_ivec4(sizes_)); + } + return api::BufferBindInfo(sizes_uniform_.buffer()); +} + +const api::BufferBindInfo vTensor::texture_limits_ubo() { + if (!texture_limits_uniform_.buffer()) { + texture_limits_uniform_ = + api::UniformParamsBuffer(storage_.context_, texture_limits_); + } + return api::BufferBindInfo(texture_limits_uniform_.buffer()); +} + VmaAllocationCreateInfo vTensor::get_allocation_create_info() const { switch (storage_type()) { case api::kBuffer: @@ -224,7 +249,25 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) { void vTensor::update_size_metadata(const std::vector& new_sizes) { sizes_ = new_sizes; gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type()); - sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_)); + + if (storage_type() != api::kBuffer) { + // Calculate the extents of the image texture that would have been required + // for a tensor of the new sizes. + api::utils::uvec3 virtual_extents = + create_image_extents(gpu_sizes_, storage_type(), memory_layout_); + // Update the texture limits to reflect the new virtual extents. + texture_limits_.limits = api::utils::ivec3{ + api::utils::safe_downcast(virtual_extents.data[0]), + api::utils::safe_downcast(virtual_extents.data[1]), + api::utils::safe_downcast(virtual_extents.data[2])}; + } + + if (sizes_uniform_.buffer()) { + sizes_uniform_.update(api::utils::make_whcn_ivec4(sizes_)); + } + if (texture_limits_uniform_.buffer()) { + texture_limits_uniform_.update(texture_limits_); + } } void vTensor::reallocate(const std::vector& new_sizes) { @@ -236,6 +279,8 @@ void vTensor::reallocate(const std::vector& new_sizes) { } void vTensor::virtual_resize(const std::vector& new_sizes) { + // For texture storage check that the current texture is large enough for the + // new sizes of the tensor. if (storage_type() != api::kBuffer) { api::utils::uvec3 virtual_extents = create_image_extents(gpu_sizes_, storage_type(), memory_layout_); diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h index 8ba99ed1827..53dbfecffe6 100644 --- a/backends/vulkan/runtime/api/Tensor.h +++ b/backends/vulkan/runtime/api/Tensor.h @@ -94,6 +94,13 @@ class vTensorStorage final { }; class vTensor final { + struct TextureLimits { + // Alignment is required to conform with Vulkan specification; a 3 or 4 + // component vector with components of size N must have base alignment of + // 4N. + alignas(16) api::utils::ivec3 limits; + }; + public: explicit vTensor( api::Context* context, @@ -115,11 +122,18 @@ class vTensor final { std::vector sizes_; std::vector gpu_sizes_; + TextureLimits texture_limits_; - // A Vulkan uniform buffer containing the tensor sizes in WHCN that can be - // passed into a shader. + // A Vulkan uniform buffer containing the (W, H, C, N) tensor sizes that can + // be passed into a shader. api::UniformParamsBuffer sizes_uniform_; + // A Vulkan uniform buffer containing the texture limits derived from the + // tensor's current size information that can be passed into a shader. Note + // that the texture limits may be different from the texture's extents if the + // tensor has been resized with `virtual_resize()`. + api::UniformParamsBuffer texture_limits_uniform_; + vTensorStorage storage_; public: @@ -194,11 +208,17 @@ class vTensor final { /* * Get the binding information for the uniform buffer object containing the - * tensor sizes to use in a compute shader. + * tensor sizes to use in a compute shader. Note that the GPU buffer will be + * allocated the first time this function is called. */ - inline const api::BufferBindInfo sizes_ubo() { - return api::BufferBindInfo(sizes_uniform_.buffer()); - } + const api::BufferBindInfo sizes_ubo(); + + /* + * Get the binding information for the uniform buffer object containing the + * texture limits to use in a compute shader. Note that the GPU buffer will be + * allocated the first time this function is called. + */ + const api::BufferBindInfo texture_limits_ubo(); inline size_t numel() const { return api::utils::multiply_integers(sizes()); diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl index 578c195ea9d..33f5ff9dd3e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d.glsl @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in; layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in; -layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 5) uniform PRECISION restrict InSizes { @@ -44,8 +44,6 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - /* * Computes a 2D convolution. Each shader invocation calculates the output at * a single output location. @@ -53,7 +51,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl index fa6dee4760f..56d70a2bfe0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw.glsl @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in; layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in; -layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 5) uniform PRECISION restrict InSizes { @@ -44,8 +44,6 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - /* * Computes a depthwise convolution. Each shader invocation calculates the * output at a single output location. @@ -53,7 +51,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl index 207eab0a9c6..cf4cfe66ac2 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_dw_output_tile.glsl @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in; layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in; -layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 5) uniform PRECISION restrict InSizes { @@ -44,8 +44,6 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - /* * Computes a depthwise convolution. Each shader invocation calculates the * output at a single output location. @@ -53,7 +51,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl index bb780ad2886..453a03dea54 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv2d_pw.glsl @@ -21,8 +21,8 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in; layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in; -layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 5) uniform PRECISION restrict InSizes { @@ -44,8 +44,6 @@ layout(set = 0, binding = 7) uniform PRECISION restrict ExtraParams { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - /* * Computes a 2D pointwise convolution of an NxN output tile. Calculating an * output tile for pointwise convolution is more efficient because the kernel @@ -71,7 +69,7 @@ void main() { // If the top left position is out of bounds, then this invocation will have // no work to do. - if (pos_out_of_bounds(pos[0], out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos[0], out_limits))) { return; } @@ -146,7 +144,7 @@ void main() { } for (int i = 0; i < ${TILE_SIZE * TILE_SIZE}; ++i) { - if (!pos_out_of_bounds(pos[i], out_sizes, packed_dim)) { + if (all(lessThan(pos[i], out_limits))) { imageStore(image_out, pos[i], sum[i]); } } diff --git a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d.glsl b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d.glsl index 4a141ddded9..3f2f6241a1d 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/conv_transpose2d.glsl @@ -21,11 +21,11 @@ layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 2) uniform PRECISION sampler2D kernel_in; layout(set = 0, binding = 3) uniform PRECISION sampler2D bias_in; -layout(set = 0, binding = 4) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 4) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; -layout(set = 0, binding = 5) uniform PRECISION restrict InExtents { +layout(set = 0, binding = 5) uniform PRECISION restrict InSizes { ivec4 in_sizes; }; @@ -54,7 +54,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/matmul.glsl b/backends/vulkan/runtime/graph/ops/glsl/matmul.glsl index 9cd0c63ac88..a911c4fb6e4 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/matmul.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/matmul.glsl @@ -16,8 +16,8 @@ layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict layout(set = 0, binding = 1) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} im_mat1; layout(set = 0, binding = 2) uniform PRECISION ${SAMPLER_T[NDIM][DTYPE]} im_mat2; -layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 3) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 4) uniform PRECISION restrict InSizes { @@ -26,12 +26,10 @@ layout(set = 0, binding = 4) uniform PRECISION restrict InSizes { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int out_packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, out_packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl b/backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl index ccac87b3864..25749afbf85 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/max_pool2d.glsl @@ -19,8 +19,8 @@ layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict layout(set = 0, binding = 1, ${IMAGE_FORMAT["int"]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM]["int"]} image_idx; layout(set = 0, binding = 2) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 3) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 4) uniform PRECISION restrict InSizes { @@ -36,12 +36,10 @@ layout(set = 0, binding = 5) uniform PRECISION restrict Params { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl b/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl index 32bf2df0e93..235408c0a81 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/native_layer_norm.glsl @@ -25,22 +25,24 @@ layout(set = 0, binding = 3) uniform PRECISION sampler3D image_in; layout(set = 0, binding = 4) uniform PRECISION sampler3D weight_in; layout(set = 0, binding = 5) uniform PRECISION sampler3D bias_in; -layout(set = 0, binding = 6) uniform PRECISION restrict Sizes { +layout(set = 0, binding = 6) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 7) uniform PRECISION restrict Sizes { ivec4 sizes; }; -layout(set = 0, binding = 7) uniform PRECISION restrict Epsilon { +layout(set = 0, binding = 8) uniform PRECISION restrict Epsilon { float epsilon; }; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/permute.glsl b/backends/vulkan/runtime/graph/ops/glsl/permute.glsl index 4ba2c7f4c60..ff5ab63a4f7 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/permute.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/permute.glsl @@ -19,31 +19,27 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; -/* - * Params Buffer - */ -layout(set = 0, binding = 3) uniform PRECISION restrict Block { +layout(set = 0, binding = 4) uniform PRECISION restrict Block { // output dims uvec4 out_ndims; // x = output channels aligned to 4, y = input channels aligned to 4 uvec2 ch_info; }; -/* - * Local Work Group - */ layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl index f6135d138c2..f94e1120492 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_batch_4d.glsl @@ -17,11 +17,15 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict Sizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; -layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { +layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { // data.x: index along batch dim to select // data.y: number of batches // data.z: number of texels per batch @@ -31,8 +35,6 @@ layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const int num_batches = select_info.y; const int num_texel_per_batch = select_info.z; @@ -40,7 +42,7 @@ void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl index b86b15e8614..0bbec798484 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_channel_3d.glsl @@ -20,23 +20,25 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; // index to select -layout(set = 0, binding = 3) uniform PRECISION restrict IndexVal { +layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { int index; }; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl index b3ff196682e..517362f76ea 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_channel_4d.glsl @@ -18,11 +18,15 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; -layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { +layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { // data.x: index along channel dim to select // data.y: number of batches // data.z: number of texels per batch @@ -32,12 +36,10 @@ layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl index b71efd7d50b..87409fb35fd 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_height_3d.glsl @@ -18,23 +18,25 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; // index to select -layout(set = 0, binding = 3) uniform PRECISION restrict IndexVal { +layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { int index; }; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl index e78b692ecb3..2e4e2afb2db 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_height_4d.glsl @@ -18,12 +18,16 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; // index to select -layout(set = 0, binding = 3) uniform PRECISION restrict IndexVal { +layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { // data.x: index along height dim to select // data.y: number of batches // data.z: number of texels per batch @@ -33,12 +37,10 @@ layout(set = 0, binding = 3) uniform PRECISION restrict IndexVal { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl index 56d71f58d02..1e12d15ab21 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_width_3d.glsl @@ -19,23 +19,25 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; // index to select -layout(set = 0, binding = 3) uniform PRECISION restrict IndexVal { +layout(set = 0, binding = 4) uniform PRECISION restrict IndexVal { int index; }; layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl index 3e09e329b31..ffbd8afbda0 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/select_width_4d.glsl @@ -19,12 +19,16 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; +}; + +layout(set = 0, binding = 3) uniform PRECISION restrict Sizes { ivec4 sizes; }; // index to select -layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { +layout(set = 0, binding = 4) uniform PRECISION restrict SelectVal { // data.x: index along width dim to select // data.y: number of batches // data.z: number of texels per batch @@ -34,12 +38,10 @@ layout(set = 0, binding = 3) uniform PRECISION restrict SelectVal { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl b/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl index cfe264b5491..607f77d8254 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/slice_channel.glsl @@ -39,7 +39,6 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 out_pos = ivec3(gl_GlobalInvocationID); - const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim); if (any(greaterThanEqual(idx, out_sizes))) { diff --git a/backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl b/backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl index 3e7cb25be5a..03cd94fb3d7 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl @@ -18,8 +18,8 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; // dim to sum @@ -54,7 +54,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl b/backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl index b7ebd353b57..64d37a13e8f 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl @@ -17,8 +17,8 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; // dim to sum @@ -48,7 +48,7 @@ layout(constant_id = 3) const int packed_dim = C_DIM; void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/unary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/unary_op.glsl index fda2a08188a..85e2c5c1a5e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/unary_op.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/unary_op.glsl @@ -21,8 +21,8 @@ layout(std430) buffer; layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in; -layout(set = 0, binding = 2) uniform PRECISION restrict OutSizes { - ivec4 out_sizes; +layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits { + ivec3 out_limits; }; layout(set = 0, binding = 3) uniform PRECISION restrict Min { @@ -35,12 +35,10 @@ layout(set = 0, binding = 4) uniform PRECISION restrict Max { layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -layout(constant_id = 3) const int packed_dim = C_DIM; - void main() { const ivec3 pos = ivec3(gl_GlobalInvocationID); - if (pos_out_of_bounds(pos, out_sizes, packed_dim)) { + if (any(greaterThanEqual(pos, out_limits))) { return; } diff --git a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp index 2ad1880667c..20d7c9256bb 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Convolution.cpp @@ -372,13 +372,13 @@ void add_conv2d_node( {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}}, // Shader params buffers { - t_out->sizes_ubo(), + t_out->texture_limits_ubo(), t_in->sizes_ubo(), graph.create_params_buffer(kernel_params), graph.create_params_buffer(extra_params), }, // Specialization Constants - {t_out->gpu_memory_layout_int()}, + {}, // Resizing Logic resize_conv2d_node, {weight, stride, padding, dilation, transposed, output_padding})); diff --git a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp index 4ac6e148274..053ef0ff350 100644 --- a/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/MatMul.cpp @@ -93,9 +93,9 @@ void add_matmul_node( {{out, api::MemoryAccessType::WRITE}, {{arg1, arg2}, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out->sizes_ubo(), t_mat1->sizes_ubo()}, + {t_out->texture_limits_ubo(), t_mat1->sizes_ubo()}, // Specialization Constants - {t_out->gpu_memory_layout_int()}, + {}, // Resizing Logic resize_matmul_node)); } diff --git a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp index 1f34b0344e8..0c579274448 100644 --- a/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/NativeLayerNorm.cpp @@ -109,9 +109,11 @@ void add_native_layer_norm_node( api::MemoryAccessType::WRITE}, {{arg_in, arg_weight, arg_bias}, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out->sizes_ubo(), graph.create_params_buffer(epsilon)}, + {t_out->texture_limits_ubo(), + t_out->sizes_ubo(), + graph.create_params_buffer(epsilon)}, // Specialization Constants - {SV(t_out->gpu_memory_layout_int())}, + {}, // Resizing Logic resize_native_layer_norm_node, {normalized_shape})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp index 09e5cc906e9..3bc6ca52c60 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp @@ -88,9 +88,11 @@ void add_permute_node( global_size, local_size, {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}}, - {t_out->sizes_ubo(), graph.create_params_buffer(params)}, + {t_out->texture_limits_ubo(), + t_out->sizes_ubo(), + graph.create_params_buffer(params)}, // Specialization Constants - {SV(t_out->gpu_memory_layout_int())}, + {}, // Resizing Logic nullptr, {})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp index 58557788138..1a8a258627e 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Pool.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Pool.cpp @@ -94,12 +94,12 @@ void add_max_pool2d_node( {arg, api::MemoryAccessType::READ}}, // Shader params buffers { - t_out->sizes_ubo(), + t_out->texture_limits_ubo(), t_in->sizes_ubo(), graph.create_params_buffer(kernel_params), }, // Specialization Constants - {t_out->gpu_memory_layout_int()}, + {}, // Resizing Logic resize_max_pool2d_node, {kernel_size, stride, padding, dilation, ceil_mode})); diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp index 073eae77ce4..1d85984ef18 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp @@ -114,14 +114,15 @@ void add_select_int_node( // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {in, api::MemoryAccessType::READ}}, // Parameter buffers - {t_out->sizes_ubo(), + {t_out->texture_limits_ubo(), + t_out->sizes_ubo(), // TODO: num_batches and num_texel_per_batch are provided by // t_out->sizes. Can change the following to reduce params // created. graph.create_params_buffer(api::utils::make_ivec4( {index, num_batches, num_texel_per_batch, 0}))}, // Specialization Constants - {SV(t_out->gpu_memory_layout_int())})); + {})); } void select_int(ComputeGraph& graph, const std::vector& args) { diff --git a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp index 652340d1dc6..cf7f891cdcb 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Sum.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Sum.cpp @@ -87,12 +87,12 @@ void add_sum_dim_node( // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out->sizes_ubo(), + {t_out->texture_limits_ubo(), graph.create_params_buffer(dim + 4 - in_dim), graph.create_params_buffer(dim_size), graph.create_params_buffer(int(ceil(channel / 4.0)))}, // Specialization Constants - {t_out->gpu_memory_layout_int()}, + {}, // Resizing Logic resize_sum_node, {out, in, static_cast(dim), keepdim})); diff --git a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp index 0d28f52e1c2..3888118b90d 100644 --- a/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/UnaryOp.cpp @@ -55,11 +55,11 @@ void add_unary_op_node( // Inputs and Outputs {{out, api::MemoryAccessType::WRITE}, {arg, api::MemoryAccessType::READ}}, // Shader params buffers - {t_out->sizes_ubo(), + {t_out->texture_limits_ubo(), graph.create_params_buffer(min), graph.create_params_buffer(max)}, // Specialization Constants - {t_out->gpu_memory_layout_int()}, + {}, // Resizing Logic resize_unary_op_node)); } diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index aecc27d966f..4955d0537ee 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -287,8 +287,8 @@ TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) { vTensor b = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); vTensor c = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); - // Allocations will be made for uniform buffers containing tensor metadata - EXPECT_TRUE(get_vma_allocation_count() == 3); + // No allocations made so far + EXPECT_TRUE(get_vma_allocation_count() == 0); std::vector data_a(a.gpu_numel()); std::fill(data_a.begin(), data_a.end(), 2.5f); @@ -303,8 +303,8 @@ TEST_F(VulkanComputeAPITest, texture_deferred_allocation_test) { api::MemoryAllocation c_mem = allocate_memory_for(c); c.image().bind_allocation(c_mem); - // One additional allocation for each tensor - EXPECT_TRUE(get_vma_allocation_count() == 6); + // One allocation for each tensor + EXPECT_TRUE(get_vma_allocation_count() == 3); fill_vtensor(a, data_a); fill_vtensor(b, data_b); @@ -332,8 +332,8 @@ TEST_F(VulkanComputeAPITest, texture_resource_aliasing_test) { vTensor d = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); vTensor e = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); - // Allocations will be made for uniform buffers containing tensor metadata - EXPECT_TRUE(get_vma_allocation_count() == 5); + // No allocations made so far + EXPECT_TRUE(get_vma_allocation_count() == 0); // a and d can share the same memory allocation api::MemoryAllocation a_d_mem = allocate_memory_for(a); @@ -347,8 +347,8 @@ TEST_F(VulkanComputeAPITest, texture_resource_aliasing_test) { api::MemoryAllocation c_mem = allocate_memory_for(c); c.image().bind_allocation(c_mem); - // 3 additional allocations should be made - EXPECT_TRUE(get_vma_allocation_count() == 8); + // 3 allocations should be made + EXPECT_TRUE(get_vma_allocation_count() == 3); // Specify input data std::vector data_a(a.gpu_numel()); @@ -407,12 +407,12 @@ TEST_F(VulkanComputeAPITest, resource_destructor_non_owning_memory) { vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); memory = allocate_memory_for(a); - EXPECT_TRUE(get_vma_allocation_count() == 2); + EXPECT_TRUE(get_vma_allocation_count() == 1); a.image().bind_allocation(memory); } // Check that the memory is still allocated - EXPECT_TRUE(get_vma_allocation_count() == 2); + EXPECT_TRUE(get_vma_allocation_count() == 1); } TEST_F(VulkanComputeAPITest, use_non_bound_textures_fails) { @@ -421,8 +421,8 @@ TEST_F(VulkanComputeAPITest, use_non_bound_textures_fails) { std::vector sizes = {4, 4, 1}; vTensor a = CREATE_FLOAT_TEXTURE(sizes, /*allocate_memory = */ false); - // Allocation for uniform containing tensor metadata - EXPECT_TRUE(get_vma_allocation_count() == 1); + // No allocations yet + EXPECT_TRUE(get_vma_allocation_count() == 0); std::vector data_a(a.gpu_numel()); std::fill(data_a.begin(), data_a.end(), 2.5f);