Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions onnxruntime/core/common/safeint.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,50 @@
#if defined(__GNUC__)
#pragma GCC diagnostic pop
#endif

#include <type_traits>

namespace onnxruntime {

template <typename T>
using remove_cvref_t = std::remove_cv_t<std::remove_reference_t<T>>;

template <typename T>
inline constexpr bool is_supported_integer_v =
std::is_integral_v<remove_cvref_t<T>> && !std::is_same_v<remove_cvref_t<T>, bool>;

//------------------------------------------------------------------------------
// Safe multiplication of two or more integer values into an explicit result type R.
// Throws OnnxRuntimeException on overflow.
//------------------------------------------------------------------------------
template <typename R, typename T, typename U, typename... Rest>
[[nodiscard]] R SafeMul(T a, U b, Rest... rest) {
static_assert(is_supported_integer_v<R>,
"SafeMul requires an integral result type (excluding bool)");
static_assert(is_supported_integer_v<T> && is_supported_integer_v<U>,
"SafeMul requires integral operand types (excluding bool)");
static_assert((is_supported_integer_v<Rest> && ...),
"SafeMul requires integral operand types (excluding bool)");

// SafeMultiply(T, U, T&) requires the first argument and result to share
// the same type. Cast the first operand to R so the result is directly in R.
R result{};
if constexpr (std::is_same_v<R, T>) {
result = a;
} else {
if (!SafeCast(a, result)) {
ORT_THROW("SafeMul: integer multiplication overflow");
}
}

if (!SafeMultiply(result, b, result)) {
ORT_THROW("SafeMul: integer multiplication overflow");
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
}

if constexpr (sizeof...(rest) > 0) {
return SafeMul<R>(result, rest...);
}
return result;

Check warning on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

unreachable code

Check warning on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

unreachable code

Check failure on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

the following warning is treated as an error

Check warning on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

unreachable code

Check warning on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

unreachable code

Check failure on line 82 in onnxruntime/core/common/safeint.h

View workflow job for this annotation

GitHub Actions / build_x64_debug

the following warning is treated as an error
}

} // namespace onnxruntime
57 changes: 37 additions & 20 deletions onnxruntime/core/providers/cpu/rnn/rnn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "core/providers/cpu/rnn/rnn.h"

#include "core/common/narrow.h"
#include "core/common/safeint.h"
#include "core/framework/op_kernel_context_internal.h"
#include "core/providers/cpu/rnn/rnn_activation_functors.h"
Expand Down Expand Up @@ -84,15 +85,31 @@ void ApplyActivationToBatches(const Tensor* sequence_lens, const T* h_prev, T* Y
template <typename T>
void Assign_Y_h(const T* Y_buffer_data, Tensor* Y_h, const Tensor* sequence_lens,
int64_t num_directions, int direction, bool isReverse, int64_t batch_size, int64_t seq_length, int64_t hidden_size) {
if (seq_length == 0) {
// No sequence data was processed; zero out Y_h for this direction.
int64_t Y_h_direction_offset = direction * batch_size * hidden_size;
math::Set<T, CPUMathUtil>(SafeMul<size_t>(batch_size, hidden_size), T{0},
Comment thread
tianleiwu marked this conversation as resolved.
Outdated
Y_h->MutableData<T>() + Y_h_direction_offset, &CPUMathUtil::Instance());
return;
}

for (int batch = 0; batch < batch_size; batch++) {
int64_t last_time_step = isReverse ? 0 : seq_length - 1;
if (nullptr != sequence_lens && !isReverse)
if (nullptr != sequence_lens && !isReverse) {
last_time_step = sequence_lens->Data<int>()[batch] - 1;
if (last_time_step < 0) {
// sequence_lens[batch] == 0: no data was processed for this batch; zero out Y_h.
int64_t Y_h_offset = direction * batch_size * hidden_size + batch * hidden_size;
math::Set<T, CPUMathUtil>(narrow<size_t>(hidden_size), T{0},
Y_h->MutableData<T>() + Y_h_offset, &CPUMathUtil::Instance());
continue;
}
Comment thread
tianleiwu marked this conversation as resolved.
}
int64_t y_offset = last_time_step * num_directions * batch_size * hidden_size +
direction * batch_size * hidden_size +
batch * hidden_size;
int64_t Y_h_offset = direction * batch_size * hidden_size + batch * hidden_size;
math::CopyVector<T, CPUMathUtil>(static_cast<int>(hidden_size), Y_buffer_data + y_offset,
math::CopyVector<T, CPUMathUtil>(narrow<int>(hidden_size), Y_buffer_data + y_offset,
Y_h->MutableData<T>() + Y_h_offset,
&CPUMathUtil::Instance());
}
Expand All @@ -109,7 +126,7 @@ void ClearMissingFrames(T* Y_buffer_data, const Tensor* sequence_lens,
seq * num_directions * batch_size * hidden_size +
direction * batch_size * hidden_size +
batch * hidden_size;
math::Set<T, CPUMathUtil>(onnxruntime::narrow<size_t>(hidden_size), 0, Y_buffer_data + offset, &CPUMathUtil::Instance());
math::Set<T, CPUMathUtil>(narrow<size_t>(hidden_size), 0, Y_buffer_data + offset, &CPUMathUtil::Instance());
}
}
}
Expand Down Expand Up @@ -155,7 +172,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
ORT_RETURN_IF_ERROR(ctx->GetTempSpaceAllocator(&alloc));

// X * W^t, each direction has shape of [seq_length, batch_size, hidden_size]
auto x_matmul_data = alloc->Alloc(SafeInt<size_t>(sizeof(float)) * seq_length * batch_size * hidden_size_);
auto x_matmul_data = alloc->Alloc(SafeMul<size_t>(sizeof(float), seq_length, batch_size, hidden_size_));
BufferUniquePtr x_matmul_buffer(x_matmul_data, BufferDeleter(alloc));
auto* x_matmul_w_buffer_data = static_cast<float*>(x_matmul_buffer.get());

Expand All @@ -165,7 +182,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
if (Y != nullptr)
Y_buffer_data = Y->MutableData<float>();
else {
Y_data = alloc->Alloc(SafeInt<size_t>(sizeof(float)) * seq_length * num_directions * batch_size * hidden_size_);
Y_data = alloc->Alloc(SafeMul<size_t>(sizeof(float), seq_length, num_directions, batch_size, hidden_size_));
Y_matmul_buffer = BufferUniquePtr(Y_data, BufferDeleter(alloc));
Y_buffer_data = static_cast<float*>(Y_matmul_buffer.get());
}
Expand All @@ -177,20 +194,20 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
bool isReverse = direction_ == "reverse" || direction == 1;

if (B != nullptr) {
EigenMatrixMapRowMajor<float>(x_matmul_w_buffer_data, seq_length * SafeInt<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_)).rowwise() =
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_, onnxruntime::narrow<size_t>(hidden_size_)).transpose() +
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_ + hidden_size_, onnxruntime::narrow<size_t>(hidden_size_)).transpose();
EigenMatrixMapRowMajor<float>(x_matmul_w_buffer_data, SafeMul<size_t>(seq_length, batch_size), narrow<size_t>(hidden_size_)).rowwise() =
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_, narrow<size_t>(hidden_size_)).transpose() +
ConstEigenVectorMap<float>(B->Data<float>() + direction * 2 * hidden_size_ + hidden_size_, narrow<size_t>(hidden_size_)).transpose();
} else {
math::Set<float, CPUMathUtil>(seq_length * batch_size * SafeInt<size_t>(hidden_size_), 0, x_matmul_w_buffer_data, &CPUMathUtil::Instance());
math::Set<float, CPUMathUtil>(SafeMul<size_t>(seq_length, batch_size, hidden_size_), 0, x_matmul_w_buffer_data, &CPUMathUtil::Instance());
}

// X * W[direction]^t + B
math::Gemm<float>(
CblasNoTrans,
CblasTrans,
static_cast<int>(seq_length * batch_size),
static_cast<int>(hidden_size_),
static_cast<int>(input_size),
SafeMul<int>(seq_length, batch_size),
narrow<int>(hidden_size_),
narrow<int>(input_size),
1,
X.Data<float>(),
W.Data<float>() + direction * hidden_size_ * input_size,
Expand All @@ -202,7 +219,7 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
int64_t time_step = isReverse ? (seq_length - t - 1) : t;
int64_t Y_frame_offset = (time_step * num_directions + direction) * Y_frame_size;
float* Y_buffer_data_current_frame = Y_buffer_data + Y_frame_offset;
auto y_frame_mat = EigenMatrixMapRowMajor<float>(Y_buffer_data_current_frame, onnxruntime::narrow<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_));
auto y_frame_mat = EigenMatrixMapRowMajor<float>(Y_buffer_data_current_frame, narrow<size_t>(batch_size), narrow<size_t>(hidden_size_));

const float* h_prev = nullptr;
if (t == 0) {
Expand All @@ -224,21 +241,21 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
math::Gemm<float>(
CblasNoTrans,
CblasTrans,
static_cast<int>(batch_size),
static_cast<int>(hidden_size_),
static_cast<int>(hidden_size_),
narrow<int>(batch_size),
narrow<int>(hidden_size_),
narrow<int>(hidden_size_),
1,
h_prev,
R.Data<float>() + direction * hidden_size_ * hidden_size_,
0,
Y_buffer_data_current_frame,
tp, &mlas_backend_kernel_selector_config_);
} else {
math::Set<float, CPUMathUtil>(batch_size * SafeInt<size_t>(hidden_size_), 0, Y_buffer_data_current_frame, &CPUMathUtil::Instance());
math::Set<float, CPUMathUtil>(SafeMul<size_t>(batch_size, hidden_size_), 0, Y_buffer_data_current_frame, &CPUMathUtil::Instance());
}

// X[time_step] * W^t + H_t_1 * R^t
y_frame_mat += EigenMatrixMapRowMajor<float>(&x_matmul_w_buffer_data[time_step * Y_frame_size], onnxruntime::narrow<size_t>(batch_size), onnxruntime::narrow<size_t>(hidden_size_));
y_frame_mat += EigenMatrixMapRowMajor<float>(&x_matmul_w_buffer_data[time_step * Y_frame_size], narrow<size_t>(batch_size), narrow<size_t>(hidden_size_));

// apply activation
ApplyActivationToBatches<float>(sequence_lens, h_prev, Y_buffer_data_current_frame,
Expand All @@ -258,10 +275,10 @@ Status RNN<float>::Compute(OpKernelContext* ctx) const {
}

if (Y != nullptr)
DumpMatrix("Y", Y_buffer_data, (int)(seq_length * num_directions * batch_size), (int)hidden_size_);
DumpMatrix("Y", Y_buffer_data, SafeMul<int>(seq_length, num_directions, batch_size), narrow<int>(hidden_size_));

if (Y_h != nullptr)
DumpMatrix("Y_h", Y_h->Data<float>(), (int)(num_directions * batch_size), (int)hidden_size_);
DumpMatrix("Y_h", Y_h->Data<float>(), SafeMul<int>(num_directions, batch_size), narrow<int>(hidden_size_));

return Status::OK();
}
Expand Down
Loading