diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index 507c6722bc349..62a326e723021 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -742,8 +742,10 @@ Do not modify directly.*
|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
|||[9, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
|||[7, 8]|**T** = tensor(double), tensor(float), tensor(float16)|
-|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
-|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
+|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 21]|**T** = tensor(double), tensor(float), tensor(float16)|
+|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(double), tensor(float), tensor(float16)|
+|||[1, 21]|**T** = tensor(double), tensor(float), tensor(float16)|
|Greater|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T1** = tensor(bool)|
|||[9, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|||[7, 8]|**T** = tensor(double), tensor(float), tensor(float16)|
@@ -1066,8 +1068,10 @@ Do not modify directly.*
|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
-|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
-|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)|
+|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float), tensor(float16)|
+|||[1, 21]|**T** = tensor(float), tensor(float16)|
+|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float), tensor(float16)|
+|||[1, 21]|**T** = tensor(float), tensor(float16)|
|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|22+|**T1** = tensor(float)
**T2** = tensor(float)|
|||[20, 21]|**T1** = tensor(float)
**T2** = tensor(float)|
|||[16, 19]|**T1** = tensor(float)
**T2** = tensor(float)|
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
index 60ac16018f539..cf5c92e79ca48 100755
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -760,18 +760,18 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kO
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 7, 9, float, AveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 7, 9, double, AveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 7, 9, MLFloat16, AveragePool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, GlobalAveragePool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, GlobalAveragePool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalAveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, float, GlobalAveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, double, GlobalAveragePool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, MLFloat16, GlobalAveragePool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 7, float, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 7, double, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 7, MLFloat16, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 8, 9, float, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 8, 9, double, MaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 8, 9, MLFloat16, MaxPool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, float, GlobalMaxPool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, double, GlobalMaxPool);
-class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalMaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, float, GlobalMaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, double, GlobalMaxPool);
+class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 21, MLFloat16, GlobalMaxPool);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, float, ArgMax);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, double, ArgMax);
class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 1, 11, MLFloat16, ArgMax);
@@ -1579,6 +1579,12 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, double, AveragePool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, MLFloat16, AveragePool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, BFloat16, AveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, float, GlobalAveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, double, GlobalAveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, MLFloat16, GlobalAveragePool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, float, GlobalMaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, double, GlobalMaxPool);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, MLFloat16, GlobalMaxPool);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, float, Conv);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, double, Conv);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 22, MLFloat16, Conv);
@@ -1845,18 +1851,18 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
@@ -2663,6 +2669,12 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
diff --git a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
index 8239a8ac252e6..5839d10b4345f 100755
--- a/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
+++ b/onnxruntime/core/providers/cuda/cuda_nhwc_kernels.cc
@@ -39,14 +39,18 @@ class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 10, float, ConvTranspose);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 10, MLFloat16, ConvTranspose);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(7, 9, float, AveragePool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(7, 9, MLFloat16, AveragePool);
-class CUDA_NHWC_OP_TYPED_CLASS_NAME(1, float, GlobalAveragePool);
-class CUDA_NHWC_OP_TYPED_CLASS_NAME(1, MLFloat16, GlobalAveragePool);
+class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 21, float, GlobalAveragePool);
+class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 21, MLFloat16, GlobalAveragePool);
+class CUDA_NHWC_OP_TYPED_CLASS_NAME(22, float, GlobalAveragePool);
+class CUDA_NHWC_OP_TYPED_CLASS_NAME(22, MLFloat16, GlobalAveragePool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 7, float, MaxPool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 7, MLFloat16, MaxPool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(8, 9, float, MaxPool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(8, 9, MLFloat16, MaxPool);
-class CUDA_NHWC_OP_TYPED_CLASS_NAME(1, float, GlobalMaxPool);
-class CUDA_NHWC_OP_TYPED_CLASS_NAME(1, MLFloat16, GlobalMaxPool);
+class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 21, float, GlobalMaxPool);
+class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(1, 21, MLFloat16, GlobalMaxPool);
+class CUDA_NHWC_OP_TYPED_CLASS_NAME(22, float, GlobalMaxPool);
+class CUDA_NHWC_OP_TYPED_CLASS_NAME(22, MLFloat16, GlobalMaxPool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(10, 10, float, AveragePool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(10, 10, MLFloat16, AveragePool);
class CUDA_NHWC_OP_VERSIONED_TYPED_CLASS_NAME(10, 10, float, MaxPool);
@@ -118,14 +122,18 @@ Status RegisterCudaNhwcKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
diff --git a/onnxruntime/core/providers/cuda/nn/pool.cc b/onnxruntime/core/providers/cuda/nn/pool.cc
index f5fb851e5a061..3a97a5f2481e7 100644
--- a/onnxruntime/core/providers/cuda/nn/pool.cc
+++ b/onnxruntime/core/providers/cuda/nn/pool.cc
@@ -57,9 +57,13 @@ POOLING_KERNEL(AveragePool, float, AveragePool, 22, kOnnxDomain, false)
POOLING_KERNEL(AveragePool, double, AveragePool, 22, kOnnxDomain, false)
POOLING_KERNEL(AveragePool, MLFloat16, AveragePool, 22, kOnnxDomain, false)
POOLING_KERNEL(AveragePool, BFloat16, AveragePool, 22, kOnnxDomain, false)
-POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 1, kOnnxDomain, false)
-POOLING_KERNEL(GlobalAveragePool, double, AveragePool, 1, kOnnxDomain, false)
-POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 1, kOnnxDomain, false)
+// GlobalAveragePool opsets 1-22 share the same CUDA implementation for the currently supported types.
+POOLING_KERNEL_VERSIONED(GlobalAveragePool, float, AveragePool, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(GlobalAveragePool, double, AveragePool, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(GlobalAveragePool, MLFloat16, AveragePool, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 22, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, double, AveragePool, 22, kOnnxDomain, false)
+POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 22, kOnnxDomain, false)
POOLING_KERNEL_VERSIONED(MaxPool, float, MaxPool<1>, 1, 7, kOnnxDomain, false)
POOLING_KERNEL_VERSIONED(MaxPool, double, MaxPool<1>, 1, 7, kOnnxDomain, false)
POOLING_KERNEL_VERSIONED(MaxPool, MLFloat16, MaxPool<1>, 1, 7, kOnnxDomain, false)
@@ -78,9 +82,13 @@ POOLING_KERNEL_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 12, kOnnxDomain, fal
POOLING_KERNEL_WITH_INDICES(MaxPool, int8_t, MaxPool<8>, 12, kOnnxDomain, false)
POOLING_KERNEL_WITH_INDICES(MaxPool, uint8_t, MaxPool<8>, 12, kOnnxDomain, false)
-POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 1, kOnnxDomain, false)
-POOLING_KERNEL(GlobalMaxPool, double, MaxPool<1>, 1, kOnnxDomain, false)
-POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, kOnnxDomain, false)
+// GlobalMaxPool opsets 1-22 share the same CUDA implementation for the currently supported types.
+POOLING_KERNEL_VERSIONED(GlobalMaxPool, float, MaxPool<1>, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(GlobalMaxPool, double, MaxPool<1>, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL_VERSIONED(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, 21, kOnnxDomain, false)
+POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 22, kOnnxDomain, false)
+POOLING_KERNEL(GlobalMaxPool, double, MaxPool<1>, 22, kOnnxDomain, false)
+POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 22, kOnnxDomain, false)
// NHWC variants
#ifdef ENABLE_CUDA_NHWC_OPS
@@ -97,8 +105,10 @@ POOLING_KERNEL_WITH_INDICES(MaxPool, MLFloat16, MaxPool<8>, 12, kMSInternalNHWCD
POOLING_KERNEL_WITH_INDICES(MaxPool, int8_t, MaxPool<8>, 12, kMSInternalNHWCDomain, true)
POOLING_KERNEL_WITH_INDICES(MaxPool, uint8_t, MaxPool<8>, 12, kMSInternalNHWCDomain, true)
-POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 1, kMSInternalNHWCDomain, true)
-POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(GlobalMaxPool, float, MaxPool<1>, 1, 21, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(GlobalMaxPool, MLFloat16, MaxPool<1>, 1, 21, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalMaxPool, float, MaxPool<1>, 22, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalMaxPool, MLFloat16, MaxPool<1>, 22, kMSInternalNHWCDomain, true)
POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 7, 9, kMSInternalNHWCDomain, true)
POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 7, 9, kMSInternalNHWCDomain, true)
@@ -111,8 +121,10 @@ POOLING_KERNEL_VERSIONED(AveragePool, float, AveragePool, 19, 21, kMSInternalNHW
POOLING_KERNEL_VERSIONED(AveragePool, MLFloat16, AveragePool, 19, 21, kMSInternalNHWCDomain, true)
POOLING_KERNEL(AveragePool, float, AveragePool, 22, kMSInternalNHWCDomain, true)
POOLING_KERNEL(AveragePool, MLFloat16, AveragePool, 22, kMSInternalNHWCDomain, true)
-POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 1, kMSInternalNHWCDomain, true)
-POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 1, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(GlobalAveragePool, float, AveragePool, 1, 21, kMSInternalNHWCDomain, true)
+POOLING_KERNEL_VERSIONED(GlobalAveragePool, MLFloat16, AveragePool, 1, 21, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalAveragePool, float, AveragePool, 22, kMSInternalNHWCDomain, true)
+POOLING_KERNEL(GlobalAveragePool, MLFloat16, AveragePool, 22, kMSInternalNHWCDomain, true)
#endif
class CudnnPoolingDescriptor final {
diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
index c7a3526d9f030..66a18c22dff29 100644
--- a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
@@ -1209,6 +1209,31 @@ TEST(PoolTest, GlobalAveragePool) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {});
}
+TEST(PoolTest, GlobalAveragePool_22_CUDA) {
+ auto cuda_ep = DefaultCudaExecutionProvider();
+ if (!cuda_ep) {
+ return;
+ }
+
+ OpTester test("GlobalAveragePool", 22);
+
+ std::vector x_vals = {
+ 1.0f, 2.0f, 3.0f, 4.0f,
+ 5.0f, 6.0f, 7.0f, 8.0f,
+ 9.0f, 10.0f, 11.0f, 12.0f,
+ 13.0f, 14.0f, 15.0f, 16.0f};
+ std::vector x_dims = {1, 1, 4, 4};
+ std::vector expected_dims = {1, 1, 1, 1};
+ std::vector expected_vals = {8.5f};
+
+ test.AddInput("X", x_dims, x_vals);
+ test.AddOutput("Y", expected_dims, expected_vals);
+
+ std::vector> execution_providers;
+ execution_providers.push_back(std::move(cuda_ep));
+ test.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
+}
+
TEST(PoolTest, GlobalAveragePool_Large_128) {
OpTester test("GlobalAveragePool");