diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 703e3783136..e6cac1cd5c9 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -2222,6 +2222,7 @@ extern "C" { enum ggml_scale_flag { GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8), GGML_SCALE_FLAG_ANTIALIAS = (1 << 9), + GGML_SCALE_FLAG_CUSTOM_SF = (1 << 10), // use explicit scale factors stored in op_params[1], op_params[2] }; // interpolate @@ -2255,6 +2256,22 @@ extern "C" { int64_t ne3, uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] + // Like ggml_interpolate but with explicit scale factors sf0 and sf1 for the first two + // dimensions instead of deriving them from ne0/ne1 / src.ne0/src.ne1. + // Useful when the desired coordinate mapping differs from the simple ratio + // (e.g. PyTorch scale_factor=(H+0.1)/n_grid instead of H/n_grid). + // Sets GGML_SCALE_FLAG_CUSTOM_SF internally; sf0 corresponds to dim0, sf1 to dim1. + GGML_API struct ggml_tensor * ggml_interpolate_sf( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode, // ggml_scale_mode [ | ggml_scale_flag...] + float sf0, + float sf1); + // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( struct ggml_context * ctx, diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index a9bc21da6f0..09be07b8922 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -7551,6 +7551,11 @@ static void ggml_compute_forward_upscale_f32( const int32_t mode_flags = ggml_get_op_params_i32(dst, 0); const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF); + if (mode_flags & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(dst, 1); + sf1 = ggml_get_op_params_f32(dst, 2); + } + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { pixel_offset = 0.0f; sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0; @@ -7559,13 +7564,13 @@ static void ggml_compute_forward_upscale_f32( if (mode == GGML_SCALE_MODE_NEAREST) { for (int64_t i3 = 0; i3 < ne3; i3++) { - const int64_t i03 = i3 / sf3; + const int64_t i03 = MIN((int64_t)(i3 / sf3), ne03 - 1); for (int64_t i2 = ith; i2 < ne2; i2 += nth) { - const int64_t i02 = i2 / sf2; + const int64_t i02 = MIN((int64_t)(i2 / sf2), ne02 - 1); for (int64_t i1 = 0; i1 < ne1; i1++) { - const int64_t i01 = i1 / sf1; + const int64_t i01 = MIN((int64_t)(i1 / sf1), ne01 - 1); for (int64_t i0 = 0; i0 < ne0; i0++) { - const int64_t i00 = i0 / sf0; + const int64_t i00 = MIN((int64_t)(i0 / sf0), ne00 - 1); const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); diff --git a/ggml/src/ggml-cuda/upscale.cu b/ggml/src/ggml-cuda/upscale.cu index 6bdf3cd996b..2270ce1ad91 100644 --- a/ggml/src/ggml-cuda/upscale.cu +++ b/ggml/src/ggml-cuda/upscale.cu @@ -2,6 +2,7 @@ static __global__ void upscale_f32(const float * x, float * dst, const int nb00, const int nb01, const int nb02, const int nb03, + const int ne00, const int ne01, const int ne02, const int ne03, const int ne10, const int ne11, const int ne12, const int ne13, const float sf0, const float sf1, const float sf2, const float sf3) { int index = threadIdx.x + blockIdx.x * blockDim.x; @@ -14,10 +15,10 @@ static __global__ void upscale_f32(const float * x, float * dst, int i12 = (index / (ne10 * ne11)) % ne12; int i13 = (index / (ne10 * ne11 * ne12)) % ne13; - int i00 = i10 / sf0; - int i01 = i11 / sf1; - int i02 = i12 / sf2; - int i03 = i13 / sf3; + int i00 = min((int)(i10 / sf0), ne00 - 1); + int i01 = min((int)(i11 / sf1), ne01 - 1); + int i02 = min((int)(i12 / sf2), ne02 - 1); + int i03 = min((int)(i13 / sf3), ne03 - 1); dst[index] = *( (const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00) ); } @@ -217,13 +218,14 @@ static __global__ void upscale_f32_bicubic(const float * x, float * dst, static void upscale_f32_cuda(const float * x, float * dst, const int nb00, const int nb01, const int nb02, const int nb03, + const int ne00, const int ne01, const int ne02, const int ne03, const int ne10, const int ne11, const int ne12, const int ne13, const float sf0, const float sf1, const float sf2, const float sf3, cudaStream_t stream) { const int64_t dst_size = ne10 * ne11 * ne12 * ne13; const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE; - upscale_f32<<>>(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); + upscale_f32<<>>(x, dst, nb00, nb01, nb02, nb03, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); } static void upscale_f32_bilinear_cuda(const float * x, float * dst, @@ -272,6 +274,10 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const float sf3 = (float)dst->ne[3]/src0->ne[3]; float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(dst, 1); + sf1 = ggml_get_op_params_f32(dst, 2); + } if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { sf0 = dst->ne[0] > 1 && src0->ne[0] > 1 ? (float)(dst->ne[0] - 1) / (src0->ne[0] - 1) : sf0; sf1 = dst->ne[1] > 1 && src0->ne[1] > 1 ? (float)(dst->ne[1] - 1) / (src0->ne[1] - 1) : sf1; @@ -279,7 +285,7 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { } if (mode == GGML_SCALE_MODE_NEAREST) { - upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream); + upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream); } else if (mode == GGML_SCALE_MODE_BILINEAR) { const bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS); upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], diff --git a/ggml/src/ggml-metal/ggml-metal-ops.cpp b/ggml/src/ggml-metal/ggml-metal-ops.cpp index 5b426be103f..f4bd4ee2464 100644 --- a/ggml/src/ggml-metal/ggml-metal-ops.cpp +++ b/ggml/src/ggml-metal/ggml-metal-ops.cpp @@ -3901,6 +3901,11 @@ int ggml_metal_op_upscale(ggml_metal_op_t ctx, int idx) { float poffs = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(op, 1); + sf1 = ggml_get_op_params_f32(op, 2); + } + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { poffs = 0.0f; sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0; diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 445a4deca83..7f135cfc942 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -4991,12 +4991,12 @@ kernel void kernel_upscale_nearest_f32( const int64_t i2 = tgpig.y; const int64_t i1 = tgpig.x; - const int64_t i03 = i3/args.sf3; - const int64_t i02 = i2/args.sf2; - const int64_t i01 = i1/args.sf1; + const int64_t i03 = MIN(i3/args.sf3, args.ne03 - 1); + const int64_t i02 = MIN(i2/args.sf2, args.ne02 - 1); + const int64_t i01 = MIN(i1/args.sf1, args.ne01 - 1); for (int i0 = tpitg.x; i0 < args.ne0; i0 += ntg.x) { - const int64_t i00 = i0/args.sf0; + const int64_t i00 = MIN(i0/args.sf0, args.ne00 - 1); device const float * src0_ptr = (device const float *) (src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); device float * dst_ptr = (device float *) (dst + i3*args.nb3 + i2*args.nb2 + i1*args.nb1 + i0*args.nb0); diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index a581402300a..8e6112e32e9 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -9061,6 +9061,11 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(dst, 1); + sf1 = ggml_get_op_params_f32(dst, 2); + } + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0)); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra_dst->data_device)); @@ -9071,14 +9076,18 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb03)); if (mode == GGML_SCALE_MODE_NEAREST) { - CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne0)); - CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne1)); - CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne2)); - CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne3)); - CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float), &sf0)); - CL_CHECK(clSetKernelArg(kernel, 13, sizeof(float), &sf1)); - CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf2)); - CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne02)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne03)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(int), &ne2)); + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &ne3)); + CL_CHECK(clSetKernelArg(kernel, 16, sizeof(float), &sf0)); + CL_CHECK(clSetKernelArg(kernel, 17, sizeof(float), &sf1)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(float), &sf2)); + CL_CHECK(clSetKernelArg(kernel, 19, sizeof(float), &sf3)); } else if (mode == GGML_SCALE_MODE_BILINEAR) { if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0; diff --git a/ggml/src/ggml-opencl/kernels/upscale.cl b/ggml/src/ggml-opencl/kernels/upscale.cl index 25c68351bae..363ba4445ae 100644 --- a/ggml/src/ggml-opencl/kernels/upscale.cl +++ b/ggml/src/ggml-opencl/kernels/upscale.cl @@ -7,6 +7,10 @@ kernel void kernel_upscale( ulong nb01, ulong nb02, ulong nb03, + int ne00, + int ne01, + int ne02, + int ne03, int ne10, int ne11, int ne12, @@ -31,10 +35,10 @@ kernel void kernel_upscale( int i12 = (index / (ne10 * ne11)) % ne12; int i13 = index / (ne10 * ne11 * ne12); - int i00 = (int)(i10 / sf0); - int i01 = (int)(i11 / sf1); - int i02 = (int)(i12 / sf2); - int i03 = (int)(i13 / sf3); + int i00 = min((int)(i10 / sf0), ne00 - 1); + int i01 = min((int)(i11 / sf1), ne01 - 1); + int i02 = min((int)(i12 / sf2), ne02 - 1); + int i03 = min((int)(i13 / sf3), ne03 - 1); ulong offset_src_element = (ulong)i03 * nb03 + (ulong)i02 * nb02 + (ulong)i01 * nb01 + (ulong)i00 * nb00; global const float * src_element_ptr = (global const float *)(src_base + offset_src_element); diff --git a/ggml/src/ggml-sycl/upscale.cpp b/ggml/src/ggml-sycl/upscale.cpp index e42cb419d83..60a4d24079e 100644 --- a/ggml/src/ggml-sycl/upscale.cpp +++ b/ggml/src/ggml-sycl/upscale.cpp @@ -2,6 +2,7 @@ static void upscale_f32(const float * x, float * dst, const int nb00, const int nb01, const int nb02, const int nb03, + const int ne00, const int ne01, const int ne02, const int ne03, const int ne10, const int ne11, const int ne12, const int ne13, const float sf0, const float sf1, const float sf2, const float sf3) { auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); @@ -15,10 +16,10 @@ static void upscale_f32(const float * x, float * dst, int i12 = (index / (ne10 * ne11)) % ne12; int i13 = (index / (ne10 * ne11 * ne12)) % ne13; - int i00 = i10 / sf0; - int i01 = i11 / sf1; - int i02 = i12 / sf2; - int i03 = i13 / sf3; + int i00 = sycl::min((int)(i10 / sf0), ne00 - 1); + int i01 = sycl::min((int)(i11 / sf1), ne01 - 1); + int i02 = sycl::min((int)(i12 / sf2), ne02 - 1); + int i03 = sycl::min((int)(i13 / sf3), ne03 - 1); dst[index] = *((const float*)((const char*)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00)); @@ -256,6 +257,10 @@ static void upscale_f32_sycl(const float * x, const int nb01, const int nb02, const int nb03, + const int ne00, + const int ne01, + const int ne02, + const int ne03, const int ne10, const int ne11, const int ne12, @@ -273,7 +278,7 @@ static void upscale_f32_sycl(const float * x, sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<3> /*item_ct1*/) { - upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); + upscale_f32(x, dst, nb00, nb01, nb02, nb03, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); }); } @@ -376,6 +381,10 @@ void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { const float sf3 = (float)dst->ne[3]/src0->ne[3]; float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(dst, 1); + sf1 = ggml_get_op_params_f32(dst, 2); + } if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { sf0 = dst->ne[0] > 1 && src0->ne[0] > 1 ? (float)(dst->ne[0] - 1) / (src0->ne[0] - 1) @@ -389,6 +398,7 @@ void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { if (mode == GGML_SCALE_MODE_NEAREST) { upscale_f32_sycl( src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], + src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream); } else if (mode == GGML_SCALE_MODE_BILINEAR) { const bool antialias = (mode_flags & GGML_SCALE_FLAG_ANTIALIAS); diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index b2a54bd85d0..4c3bcaf0c4e 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -10771,6 +10771,11 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c float sf3 = (float)ne3 / ne03; float pixel_offset = 0.5f; + if (mode & GGML_SCALE_FLAG_CUSTOM_SF) { + sf0 = ggml_get_op_params_f32(dst, 1); + sf1 = ggml_get_op_params_f32(dst, 2); + } + if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) { sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0; sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp index f7d12a8dda6..cfd52f25275 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp @@ -26,8 +26,8 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; layout (constant_id = 0) const uint scale_mode = 0; float fetch_nearest(uint i10, uint i11, uint i12, uint i13) { - const uint i00 = uint(i10 / p.sf0); - const uint i01 = uint(i11 / p.sf1); + const uint i00 = min(uint(i10 / p.sf0), p.ne00 - 1); + const uint i01 = min(uint(i11 / p.sf1), p.ne01 - 1); const uint i02 = uint(i12 / p.sf2); const uint i03 = uint(i13 / p.sf3); diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 0142498d967..23682b8e938 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -5014,6 +5014,25 @@ struct ggml_tensor * ggml_interpolate( return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); } +struct ggml_tensor * ggml_interpolate_sf( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode, + float sf0, + float sf1) { + GGML_ASSERT(sf0 > 0.0f && "ggml_interpolate_sf: sf0 must be positive"); + GGML_ASSERT(sf1 > 0.0f && "ggml_interpolate_sf: sf1 must be positive"); + struct ggml_tensor * result = ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, + mode | GGML_SCALE_FLAG_CUSTOM_SF); + ggml_set_op_params_f32(result, 1, sf0); + ggml_set_op_params_f32(result, 2, sf1); + return result; +} + // ggml_pad struct ggml_tensor * ggml_pad( diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 828a9c14a45..d82f3abe128 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -5778,6 +5778,40 @@ struct test_interpolate : public test_case { } }; +// GGML_OP_UPSCALE (via ggml_interpolate_sf) - custom scale factors +struct test_interpolate_sf : public test_case { + const ggml_type type; + const std::array ne; + const std::array ne_tgt; + const ggml_scale_mode mode; + const float sf0; + const float sf1; + + std::string vars() override { + return VARS_TO_STR6(type, ne, ne_tgt, mode, sf0, sf1); + } + + test_interpolate_sf(ggml_type type = GGML_TYPE_F32, + std::array ne = {2, 5, 7, 11}, + std::array ne_tgt = {5, 7, 11, 13}, + ggml_scale_mode mode = GGML_SCALE_MODE_BILINEAR, + float sf0 = 1.0f, + float sf1 = 1.0f) + : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode), sf0(sf0), sf1(sf1) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_interpolate_sf(ctx, a, + ne_tgt[0], ne_tgt[1], ne_tgt[2], ne_tgt[3], + mode, sf0, sf1); + ggml_set_name(out, "out"); + + return out; + } +}; + // GGML_OP_GROUP_NORM struct test_group_norm : public test_case { const ggml_type type; @@ -8443,6 +8477,27 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS))); } + // ggml_interpolate_sf - custom scale factors + for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC, ggml_scale_mode(GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS)}) { + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode, 5.0f/2.0f, 7.0f/5.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode, 2.0f/5.0f, 5.0f/7.0f)); + } + for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) { + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS), 5.0f/2.0f, 7.0f/5.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS), 2.0f/1.0f, 8.0f/4.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS), 1.0f/4.0f, 1.0f/1.0f)); + } + for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC, ggml_scale_mode(GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS)}) { + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {14, 14, 1152, 1}, {28, 28, 1152, 1}, mode, (28.0f+0.1f)/14.0f, (28.0f+0.1f)/14.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {14, 14, 768, 1}, {20, 16, 768, 1}, mode, (20.0f+0.1f)/14.0f, (16.0f+0.1f)/14.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {8, 8, 64, 1}, {8, 8, 64, 1}, mode, 1.0f, 1.0f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {28, 28, 768, 1}, {14, 14, 768, 1}, mode, (14.0f+0.1f)/28.0f, (14.0f+0.1f)/28.0f)); + } + for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC, ggml_scale_mode(GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ANTIALIAS)}) { + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {10, 10, 64, 1}, {20, 20, 64, 1}, mode, 1.5f, 1.5f)); + test_cases.emplace_back(new test_interpolate_sf(GGML_TYPE_F32, {10, 10, 64, 1}, {20, 16, 64, 1}, mode, 1.5f, 1.2f)); + } + test_cases.emplace_back(new test_sum()); test_cases.emplace_back(new test_sum(GGML_TYPE_F32, {11, 5, 6, 3}, {0, 2, 1, 3})); // row-contiguous but non-contiguous test_cases.emplace_back(new test_sum(GGML_TYPE_F32, {11, 5, 6, 3}, {0, 3, 2, 1}));