@@ -7831,7 +7831,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
78317831
78327832// ggml_pool_*
78337833
7834- static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int  p) {
7834+ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float  p) {
78357835    return (ins + 2 * p - ks) / s + 1;
78367836}
78377837
@@ -7878,16 +7878,15 @@ struct ggml_tensor * ggml_pool_2d(
78787878        int                   k1,
78797879        int                   s0,
78807880        int                   s1,
7881-         int                    p0,
7882-         int                    p1) {
7881+         float                  p0,
7882+         float                  p1) {
78837883
78847884    bool is_node = false;
78857885
78867886    if (a->grad) {
78877887        GGML_ASSERT(false); // TODO: implement backward
78887888        is_node = true;
78897889    }
7890- 
78917890    const int64_t ne[3] = {
78927891        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
78937892        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
@@ -15007,14 +15006,11 @@ static void ggml_compute_forward_pool_1d(
1500715006    ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
1500815007}
1500915008
15010- // ggml_compute_forward_pool_2d_sk_p0 
15009+ // ggml_compute_forward_pool_2d 
1501115010
15012- static void ggml_compute_forward_pool_2d_sk_p0 (
15011+ static void ggml_compute_forward_pool_2d (
1501315012        const struct ggml_compute_params * params,
15014-         const enum   ggml_op_pool op,
1501515013        const struct ggml_tensor * src,
15016-         const int k0,
15017-         const int k1,
1501815014        struct ggml_tensor * dst) {
1501915015    assert(src->type == GGML_TYPE_F32);
1502015016    assert(params->ith == 0);
@@ -15023,6 +15019,14 @@ static void ggml_compute_forward_pool_2d_sk_p0(
1502315019        return;
1502415020    }
1502515021
15022+     const int32_t * opts = (const int32_t *)dst->op_params;
15023+     enum ggml_op_pool op = opts[0];
15024+     const int k0 = opts[1];
15025+     const int k1 = opts[2];
15026+     const int s0 = opts[3];
15027+     const int s1 = opts[4];
15028+     const int p0 = opts[5];
15029+     const int p1 = opts[6];
1502615030    const char * cdata = (const char*)src->data;
1502715031    const char * const data_end = cdata + ggml_nbytes(src);
1502815032
@@ -15033,6 +15037,8 @@ static void ggml_compute_forward_pool_2d_sk_p0(
1503315037    float * dplane = (float *)dst->data;
1503415038
1503515039    const int ka = k0 * k1;
15040+     const int offset0 = -p0;
15041+     const int offset1 = -p1;
1503615042
1503715043    while (cdata < data_end) {
1503815044        for (int oy = 0; oy < py; ++oy) {
@@ -15045,13 +15051,15 @@ static void ggml_compute_forward_pool_2d_sk_p0(
1504515051                    case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
1504615052                }
1504715053
15048-                 const int ix = ox * k0 ;
15049-                 const int iy = oy * k1 ;
15054+                 const int ix = offset0 +  ox * s0 ;
15055+                 const int iy = offset1 +  oy * s1 ;
1505015056
1505115057                for (int ky = 0; ky < k1; ++ky) {
15058+                     if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
1505215059                    const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
1505315060                    for (int kx = 0; kx < k0; ++kx) {
1505415061                        int j = ix + kx;
15062+                         if (j < 0 || j >= src->ne[0]) continue;
1505515063                        switch (op) {
1505615064                            case GGML_OP_POOL_AVG:                     *out += srow[j]; break;
1505715065                            case GGML_OP_POOL_MAX: if (srow[j] > *out) *out  = srow[j]; break;
@@ -15072,29 +15080,6 @@ static void ggml_compute_forward_pool_2d_sk_p0(
1507215080    }
1507315081}
1507415082
15075- // ggml_compute_forward_pool_2d
15076- 
15077- static void ggml_compute_forward_pool_2d(
15078-         const struct ggml_compute_params * params,
15079-         const struct ggml_tensor * src0,
15080-               struct ggml_tensor * dst) {
15081- 
15082-     const int32_t * opts = (const int32_t *)dst->op_params;
15083-     enum ggml_op_pool op = opts[0];
15084-     const int k0 = opts[1];
15085-     const int k1 = opts[2];
15086-     const int s0 = opts[3];
15087-     const int s1 = opts[4];
15088-     const int p0 = opts[5];
15089-     const int p1 = opts[6];
15090-     GGML_ASSERT(p0 == 0);
15091-     GGML_ASSERT(p1 == 0); // padding not supported
15092-     GGML_ASSERT(k0 == s0);
15093-     GGML_ASSERT(k1 == s1); // only s = k supported
15094- 
15095-     ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
15096- }
15097- 
1509815083// ggml_compute_forward_upscale
1509915084
1510015085static void ggml_compute_forward_upscale_f32(
0 commit comments