Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,8 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
endif()
list(REMOVE_ITEM all_tests
"${TEST_SRC_DIR}/providers/cpu/reduction/reduction_ops_test.cc"
"${TEST_SRC_DIR}/providers/cpu/tensor/grid_sample_test.cc")
"${TEST_SRC_DIR}/providers/cpu/tensor/grid_sample_test.cc"
"${TEST_SRC_DIR}/providers/cpu/tensor/grid_sample_test_custom.cc")
endif()

if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten" OR IOS)
Expand Down
73 changes: 69 additions & 4 deletions onnxruntime/core/providers/cpu/tensor/grid_sample.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#include <algorithm>
#include <cmath>
#include <vector>

#include "core/common/safeint.h"
Expand Down Expand Up @@ -63,9 +65,16 @@ T GsReflect(T x, T x_min, T x_max) {
T dx = {};
T fx = static_cast<T>(x);
T range = x_max - x_min;
// Guard against NaN, Inf, or non-positive range (e.g. dim==1 with align_corners=true)
// which would otherwise produce wild indices via division by zero or UB float->int casts.
if (!std::isfinite(fx) || !(range > T{0})) {
return x_min;
}
if (fx < x_min) {
dx = x_min - fx;
int n = static_cast<int>(dx / range);
// Use int64_t rather than int: for extreme (but finite) inputs, dx / range can exceed
// INT_MAX, making a float->int cast undefined behavior.
int64_t n = static_cast<int64_t>(dx / range);
Copy link
Copy Markdown
Member

@yuslepukhin yuslepukhin Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tatic_cast<int64_t>(dx / range);

Here and in all other places with static_cast<>()

static_cast<int64_t>(dx / range) is still UB for extreme inputs

For float, values up to ~3.4e38 are representable. dx / range can produce a value > 9.2e18 (INT64_MAX). The C++ standard says static_cast<int64_t> on a floating-point value outside the representable range is undefined behavior (not just implementation-defined).

For double (which is also a registered type — line 34), the range extends to ~1.8e308, making this trivially exploitable.

The std::clamp in PixelAtGrid catches the fallout in practice, but the UB in GsReflect itself means the compiler is free to do anything. A defensive fix would use std::fmod for the reflection calculation entirely in floating point, avoiding any float-to-integer cast on large values.

#Resolved

Copy link
Copy Markdown
Member

@yuslepukhin yuslepukhin Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int64_t n = static_cast<int64_t>(dx / range);

When align_corners == true and a spatial dimension is 1: x_max = 0, x_min = 0, so range = 0. GsReflect computes dx / range = division by zero. For floating point this produces +Inf, and the subsequent static_cast<int64_t>(Inf) is UB. The std::clamp in PixelAtGrid catches this, but the way clamps are used is not /wo problems (see other comments). #Resolved

T r = dx - n * range;
if (n % 2 == 0) {
fx = x_min + r;
Expand All @@ -74,7 +83,7 @@ T GsReflect(T x, T x_min, T x_max) {
}
} else if (fx > x_max) {
dx = fx - x_max;
int n = static_cast<int>(dx / range);
int64_t n = static_cast<int64_t>(dx / range);
T r = dx - n * range;
if (n % 2 == 0) {
fx = x_max - r;
Expand All @@ -86,6 +95,15 @@ T GsReflect(T x, T x_min, T x_max) {
return static_cast<T>(fx);
}

// Returns true when v is finite and its magnitude is small enough that converting it to
// int64_t via std::floor / std::nearbyint is well-defined. 2^62 is far below INT64_MAX
// (~9.22e18) and leaves ample margin for any realistic image dimension.
template <typename T>
inline bool IsSafeForInt64Conversion(T v) {
constexpr T kSafeBound = static_cast<T>(int64_t{1} << 62);
return std::isfinite(v) && v >= -kSafeBound && v <= kSafeBound;
}

// Calculate cubic convolution interpolation coefficients
// ROBERT G. KEYS https://ieeexplore.ieee.org/document/1163711
template <typename T>
Expand Down Expand Up @@ -124,6 +142,11 @@ T GridSample<T>::PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, in
} else { // (padding_mode_ == Reflection)
c = static_cast<int64_t>(GsReflect(static_cast<T>(c), border[0], border[2]));
r = static_cast<int64_t>(GsReflect(static_cast<T>(r), border[1], border[3]));
// Safety clamp: GsReflect is computed in floating point and casts back to int64_t.
// Extreme grid coordinates can overflow that cast, so clamp the resulting indices
// back into the image range before indexing.
c = std::clamp<int64_t>(c, 0, W - 1);
r = std::clamp<int64_t>(r, 0, H - 1);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::clamp requires lo <= hi. When W == 0, the call becomes std::clamp(c, 0, -1) which violates this precondition — the behavior is undefined per the C++ standard.

While the Compute function returns early if Y.Shape().Size() == 0, this check examines N * C * H_out * W_out. If H_in == 0 or W_in == 0 but H_out > 0 and W_out > 0 (possible since grid determines output spatial dims), the code will proceed to call PixelAtGrid with H == 0 or W == 0, hitting this UB.

This applies to both PixelAtGrid and PixelAtGrid3D, in the Border branch (pre-existing) and the new Reflection branch.

Also at other locations for clamp including lines 124-125.

pixel = image[r * W + c];
Comment thread
GopalakrishnanN marked this conversation as resolved.
}
return pixel;
Expand All @@ -145,6 +168,12 @@ T GridSample<T>::PixelAtGrid3D(const T* image, int64_t d, int64_t h, int64_t w,
w = static_cast<int64_t>(GsReflect(static_cast<T>(w), border[0], border[3]));
h = static_cast<int64_t>(GsReflect(static_cast<T>(h), border[1], border[4]));
d = static_cast<int64_t>(GsReflect(static_cast<T>(d), border[2], border[5]));
// Safety clamp: GsReflect is computed in floating point and casts back to int64_t.
// Extreme grid coordinates can overflow that cast, so clamp the resulting indices
// back into the image range before indexing.
w = std::clamp<int64_t>(w, 0, W - 1);
h = std::clamp<int64_t>(h, 0, H - 1);
d = std::clamp<int64_t>(d, 0, D - 1);
pixel = image[d * H * W + h * W + w];
}
return pixel;
Expand Down Expand Up @@ -186,8 +215,19 @@ void PrecomputeBilinearSamplePlan2D(const T* grid_data,
auto& plan = plans[idx];
const T nx = grid_data[idx * 2];
const T ny = grid_data[idx * 2 + 1];
const T x = GsDenormalize<T>(nx, W_in, false);
const T y = GsDenormalize<T>(ny, H_in, false);
T x = GsDenormalize<T>(nx, W_in, false);
T y = GsDenormalize<T>(ny, H_in, false);

// Sanitize coordinates that are non-finite or whose magnitude is too large
// for a safe float->int64 conversion via std::floor. The fast path is only used
// for zeros padding without align_corners, so substituting the lower border (-0.5)
// produces an out-of-bounds floor index that the mask logic correctly rejects.
if (!IsSafeForInt64Conversion(x)) {
x = static_cast<T>(-0.5f);
}
if (!IsSafeForInt64Conversion(y)) {
y = static_cast<T>(-0.5f);
}

const int64_t x1 = static_cast<int64_t>(std::floor(x));
const int64_t y1 = static_cast<int64_t>(std::floor(y));
Expand Down Expand Up @@ -398,6 +438,17 @@ Status GridSample<T>::Compute(OpKernelContext* context) const {
auto x = GsDenormalize<T>(nx, W_in, align_corners_); // actual location
auto y = GsDenormalize<T>(ny, H_in, align_corners_);

// Sanitize coordinates that are non-finite or whose magnitude is too large
// for a safe float->int64 conversion via std::floor / std::nearbyint.
// Substituting the in-range border value keeps the subsequent casts
// well-defined while still producing a defined output for each padding mode.
if (!IsSafeForInt64Conversion(x)) {
x = x_min;
}
if (!IsSafeForInt64Conversion(y)) {
y = y_min;
}

if (mode_ == Nearest) {
x = static_cast<T>(std::nearbyint(static_cast<T>(x)));
y = static_cast<T>(std::nearbyint(static_cast<T>(y)));
Expand Down Expand Up @@ -496,6 +547,20 @@ Status GridSample<T>::Compute(OpKernelContext* context) const {
auto y = GsDenormalize<T>(ny, H_in, align_corners_);
auto z = GsDenormalize<T>(nz, D_in, align_corners_);

// Sanitize coordinates that are non-finite or whose magnitude is too large
// for a safe float->int64 conversion via std::floor / std::nearbyint.
// Substituting the in-range border value keeps the subsequent casts
// well-defined while still producing a defined output for each padding mode.
if (!IsSafeForInt64Conversion(x)) {
x = x_min;
}
if (!IsSafeForInt64Conversion(y)) {
y = y_min;
}
if (!IsSafeForInt64Conversion(z)) {
z = z_min;
}

if (mode_ == Nearest) {
x = static_cast<T>(std::nearbyint(static_cast<T>(x)));
y = static_cast<T>(std::nearbyint(static_cast<T>(y)));
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ void RunTests(T& test, std::vector<std::unique_ptr<IExecutionProvider>>&& execut
execution_providers.clear();
}

// Custom tests not generated by grid_sample_test_gen.py.
#include "test/providers/cpu/tensor/grid_sample_test_custom.inc"
// Custom tests not generated by grid_sample_test_gen.py live in
// grid_sample_test_custom.cc.

// DO NOT edit following tests. They are generated by:
// onnxruntime/test/providers/cpu/tensor/grid_sample_test_gen.py
Expand Down
Loading
Loading