-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Fix integer overflow in GridSample GsReflect causing heap OOB read #27975
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| // Licensed under the MIT License. | ||
|
|
||
| #include <algorithm> | ||
| #include <cmath> | ||
| #include <vector> | ||
|
|
||
| #include "core/common/safeint.h" | ||
|
|
@@ -63,9 +65,16 @@ T GsReflect(T x, T x_min, T x_max) { | |
| T dx = {}; | ||
| T fx = static_cast<T>(x); | ||
| T range = x_max - x_min; | ||
| // Guard against NaN, Inf, or non-positive range (e.g. dim==1 with align_corners=true) | ||
| // which would otherwise produce wild indices via division by zero or UB float->int casts. | ||
| if (!std::isfinite(fx) || !(range > T{0})) { | ||
| return x_min; | ||
| } | ||
| if (fx < x_min) { | ||
| dx = x_min - fx; | ||
| int n = static_cast<int>(dx / range); | ||
| // Use int64_t rather than int: for extreme (but finite) inputs, dx / range can exceed | ||
| // INT_MAX, making a float->int cast undefined behavior. | ||
| int64_t n = static_cast<int64_t>(dx / range); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When align_corners == true and a spatial dimension is 1: x_max = 0, x_min = 0, so range = 0. GsReflect computes dx / range = division by zero. For floating point this produces +Inf, and the subsequent static_cast<int64_t>(Inf) is UB. The std::clamp in PixelAtGrid catches this, but the way clamps are used is not /wo problems (see other comments). #Resolved |
||
| T r = dx - n * range; | ||
| if (n % 2 == 0) { | ||
| fx = x_min + r; | ||
|
|
@@ -74,7 +83,7 @@ T GsReflect(T x, T x_min, T x_max) { | |
| } | ||
| } else if (fx > x_max) { | ||
| dx = fx - x_max; | ||
| int n = static_cast<int>(dx / range); | ||
| int64_t n = static_cast<int64_t>(dx / range); | ||
| T r = dx - n * range; | ||
| if (n % 2 == 0) { | ||
| fx = x_max - r; | ||
|
|
@@ -86,6 +95,15 @@ T GsReflect(T x, T x_min, T x_max) { | |
| return static_cast<T>(fx); | ||
| } | ||
|
|
||
| // Returns true when v is finite and its magnitude is small enough that converting it to | ||
| // int64_t via std::floor / std::nearbyint is well-defined. 2^62 is far below INT64_MAX | ||
| // (~9.22e18) and leaves ample margin for any realistic image dimension. | ||
| template <typename T> | ||
| inline bool IsSafeForInt64Conversion(T v) { | ||
| constexpr T kSafeBound = static_cast<T>(int64_t{1} << 62); | ||
| return std::isfinite(v) && v >= -kSafeBound && v <= kSafeBound; | ||
| } | ||
|
|
||
| // Calculate cubic convolution interpolation coefficients | ||
| // ROBERT G. KEYS https://ieeexplore.ieee.org/document/1163711 | ||
| template <typename T> | ||
|
|
@@ -124,6 +142,11 @@ T GridSample<T>::PixelAtGrid(const T* image, int64_t r, int64_t c, int64_t H, in | |
| } else { // (padding_mode_ == Reflection) | ||
| c = static_cast<int64_t>(GsReflect(static_cast<T>(c), border[0], border[2])); | ||
| r = static_cast<int64_t>(GsReflect(static_cast<T>(r), border[1], border[3])); | ||
| // Safety clamp: GsReflect is computed in floating point and casts back to int64_t. | ||
| // Extreme grid coordinates can overflow that cast, so clamp the resulting indices | ||
| // back into the image range before indexing. | ||
| c = std::clamp<int64_t>(c, 0, W - 1); | ||
| r = std::clamp<int64_t>(r, 0, H - 1); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. std::clamp requires lo <= hi. When W == 0, the call becomes std::clamp(c, 0, -1) which violates this precondition — the behavior is undefined per the C++ standard. While the Compute function returns early if Y.Shape().Size() == 0, this check examines N * C * H_out * W_out. If H_in == 0 or W_in == 0 but H_out > 0 and W_out > 0 (possible since grid determines output spatial dims), the code will proceed to call PixelAtGrid with H == 0 or W == 0, hitting this UB. This applies to both PixelAtGrid and PixelAtGrid3D, in the Border branch (pre-existing) and the new Reflection branch. Also at other locations for clamp including lines 124-125. |
||
| pixel = image[r * W + c]; | ||
|
GopalakrishnanN marked this conversation as resolved.
|
||
| } | ||
| return pixel; | ||
|
|
@@ -145,6 +168,12 @@ T GridSample<T>::PixelAtGrid3D(const T* image, int64_t d, int64_t h, int64_t w, | |
| w = static_cast<int64_t>(GsReflect(static_cast<T>(w), border[0], border[3])); | ||
| h = static_cast<int64_t>(GsReflect(static_cast<T>(h), border[1], border[4])); | ||
| d = static_cast<int64_t>(GsReflect(static_cast<T>(d), border[2], border[5])); | ||
| // Safety clamp: GsReflect is computed in floating point and casts back to int64_t. | ||
| // Extreme grid coordinates can overflow that cast, so clamp the resulting indices | ||
| // back into the image range before indexing. | ||
| w = std::clamp<int64_t>(w, 0, W - 1); | ||
| h = std::clamp<int64_t>(h, 0, H - 1); | ||
| d = std::clamp<int64_t>(d, 0, D - 1); | ||
| pixel = image[d * H * W + h * W + w]; | ||
| } | ||
| return pixel; | ||
|
|
@@ -186,8 +215,19 @@ void PrecomputeBilinearSamplePlan2D(const T* grid_data, | |
| auto& plan = plans[idx]; | ||
| const T nx = grid_data[idx * 2]; | ||
| const T ny = grid_data[idx * 2 + 1]; | ||
| const T x = GsDenormalize<T>(nx, W_in, false); | ||
| const T y = GsDenormalize<T>(ny, H_in, false); | ||
| T x = GsDenormalize<T>(nx, W_in, false); | ||
| T y = GsDenormalize<T>(ny, H_in, false); | ||
|
|
||
| // Sanitize coordinates that are non-finite or whose magnitude is too large | ||
| // for a safe float->int64 conversion via std::floor. The fast path is only used | ||
| // for zeros padding without align_corners, so substituting the lower border (-0.5) | ||
| // produces an out-of-bounds floor index that the mask logic correctly rejects. | ||
| if (!IsSafeForInt64Conversion(x)) { | ||
| x = static_cast<T>(-0.5f); | ||
| } | ||
| if (!IsSafeForInt64Conversion(y)) { | ||
| y = static_cast<T>(-0.5f); | ||
| } | ||
|
|
||
| const int64_t x1 = static_cast<int64_t>(std::floor(x)); | ||
| const int64_t y1 = static_cast<int64_t>(std::floor(y)); | ||
|
|
@@ -398,6 +438,17 @@ Status GridSample<T>::Compute(OpKernelContext* context) const { | |
| auto x = GsDenormalize<T>(nx, W_in, align_corners_); // actual location | ||
| auto y = GsDenormalize<T>(ny, H_in, align_corners_); | ||
|
|
||
| // Sanitize coordinates that are non-finite or whose magnitude is too large | ||
| // for a safe float->int64 conversion via std::floor / std::nearbyint. | ||
| // Substituting the in-range border value keeps the subsequent casts | ||
| // well-defined while still producing a defined output for each padding mode. | ||
| if (!IsSafeForInt64Conversion(x)) { | ||
| x = x_min; | ||
| } | ||
| if (!IsSafeForInt64Conversion(y)) { | ||
| y = y_min; | ||
| } | ||
|
|
||
| if (mode_ == Nearest) { | ||
| x = static_cast<T>(std::nearbyint(static_cast<T>(x))); | ||
| y = static_cast<T>(std::nearbyint(static_cast<T>(y))); | ||
|
|
@@ -496,6 +547,20 @@ Status GridSample<T>::Compute(OpKernelContext* context) const { | |
| auto y = GsDenormalize<T>(ny, H_in, align_corners_); | ||
| auto z = GsDenormalize<T>(nz, D_in, align_corners_); | ||
|
|
||
| // Sanitize coordinates that are non-finite or whose magnitude is too large | ||
| // for a safe float->int64 conversion via std::floor / std::nearbyint. | ||
| // Substituting the in-range border value keeps the subsequent casts | ||
| // well-defined while still producing a defined output for each padding mode. | ||
| if (!IsSafeForInt64Conversion(x)) { | ||
| x = x_min; | ||
| } | ||
| if (!IsSafeForInt64Conversion(y)) { | ||
| y = y_min; | ||
| } | ||
| if (!IsSafeForInt64Conversion(z)) { | ||
| z = z_min; | ||
| } | ||
|
|
||
| if (mode_ == Nearest) { | ||
| x = static_cast<T>(std::nearbyint(static_cast<T>(x))); | ||
| y = static_cast<T>(std::nearbyint(static_cast<T>(y))); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here and in all other places with static_cast<>()
static_cast<int64_t>(dx / range) is still UB for extreme inputs
For float, values up to ~3.4e38 are representable. dx / range can produce a value > 9.2e18 (INT64_MAX). The C++ standard says static_cast<int64_t> on a floating-point value outside the representable range is undefined behavior (not just implementation-defined).
For double (which is also a registered type — line 34), the range extends to ~1.8e308, making this trivially exploitable.
The std::clamp in PixelAtGrid catches the fallout in practice, but the UB in GsReflect itself means the compiler is free to do anything. A defensive fix would use std::fmod for the reflection calculation entirely in floating point, avoiding any float-to-integer cast on large values.
#Resolved