diff --git a/onnxruntime/core/providers/cpu/ml/svmclassifier.cc b/onnxruntime/core/providers/cpu/ml/svmclassifier.cc index 65bcad2be8a24..1fcf896d21227 100644 --- a/onnxruntime/core/providers/cpu/ml/svmclassifier.cc +++ b/onnxruntime/core/providers/cpu/ml/svmclassifier.cc @@ -47,32 +47,33 @@ SVMClassifier::SVMClassifier(const OpKernelInfo& info) class_count_ = 0; for (size_t i = 0; i < vectors_per_class_.size(); i++) { starting_vector_.push_back(vector_count_); - vector_count_ += narrow(vectors_per_class_[i]); + vector_count_ += onnxruntime::narrow(vectors_per_class_[i]); } + ORT_ENFORCE(classlabels_strings_.size() > 0 || classlabels_ints_.size() > 0, "One of classlabels_strings, classlabels_ints is required."); + using_strings_ = false; if (classlabels_strings_.size() > 0) { using_strings_ = true; class_count_ = classlabels_strings_.size(); - } else if (classlabels_ints_.size() > 0) { - class_count_ = classlabels_ints_.size(); } else { - class_count_ = 1; + class_count_ = classlabels_ints_.size(); } + ORT_ENFORCE(class_count_ < 65536, "The number of classes ", class_count_, " is beyond what this kernel supports (65535)."); + ORT_ENFORCE(proba_.size() == probb_.size(), "proba and probb must have the same size."); + ORT_ENFORCE(coefficients_.size() > 0, "coefficients are empty."); + if (vector_count_ > 0) { feature_count_ = support_vectors_.size() / vector_count_; // length of each support vector mode_ = SVM_TYPE::SVM_SVC; + ORT_ENFORCE(vectors_per_class_.size() == class_count_, "Mismatch between classlabels_ints/classlabels_strings and vectors_per_class dimensions."); } else { feature_count_ = coefficients_.size() / class_count_; // liblinear mode mode_ = SVM_TYPE::SVM_LINEAR; set_kernel_type(KERNEL::LINEAR); } - ORT_ENFORCE(classlabels_strings_.size() > 0 || classlabels_ints_.size() > 0); - ORT_ENFORCE(proba_.size() == probb_.size()); - ORT_ENFORCE(coefficients_.size() > 0); - // Validate attribute array sizes against the declared dimensions to prevent // out-of-bounds reads from crafted models. if (mode_ == SVM_TYPE::SVM_SVC) { @@ -121,7 +122,7 @@ SVMClassifier::SVMClassifier(const OpKernelInfo& info) } template -static void ChooseClass(Tensor& output, const int64_t output_idx, float max_weight, const int64_t maxclass, +static void ChooseClass(Tensor& output, const int64_t output_idx, float max_weight, const size_t maxclass, bool have_proba, bool weights_are_all_positive, const std::vector& classlabels, const LabelType& posclass, const LabelType& negclass) { @@ -134,9 +135,9 @@ static void ChooseClass(Tensor& output, const int64_t output_idx, float max_weig else if (max_weight > 0 && !weights_are_all_positive) output_data = classlabels[1]; else - output_data = classlabels[onnxruntime::narrow(maxclass)]; + output_data = classlabels[maxclass]; } else { - output_data = classlabels[onnxruntime::narrow(maxclass)]; + output_data = classlabels[maxclass]; } } else if (max_weight > 0) { output_data = posclass; @@ -209,7 +210,7 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, const ptrdiff_t num_batches = SafeInt(input_rank == 1 ? 1 : x_shape[0]); const ptrdiff_t num_features = input_rank == 1 ? narrow(x_shape[0]) : narrow(x_shape[1]); - ORT_RETURN_IF_NOT(num_features == feature_count_ && num_features >= 0 && num_batches >= 0, + ORT_RETURN_IF_NOT(num_features == static_cast(feature_count_) && num_features >= 0 && num_batches >= 0, "Invalid input for SVMClassifier: expected feature_count=", feature_count_, ", actual num_features=", num_features, ", input_rank=", input_rank, @@ -241,11 +242,11 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, // Total number of classifiers comparing pairs between the classes // e.g. if you have A, B C and D classes, the number of classifiers to compare between each pair is 6 // with AB, AC, AD, BC, BD and CD - const int64_t num_classifiers = class_count_ * (class_count_ - 1) / 2; // == (class_count_-1)! - const int64_t class_count_squared = class_count_ * class_count_; + const size_t num_classifiers = class_count_ * (class_count_ - 1) / 2; // == (class_count_-1)! + const size_t class_count_squared = class_count_ * class_count_; const bool have_proba = proba_.size() > 0; - int64_t final_scores_per_batch = class_count_; + size_t final_scores_per_batch = class_count_; if (mode_ == SVM_TYPE::SVM_SVC && !have_proba) { if (class_count_ > 2) final_scores_per_batch = num_classifiers; @@ -261,7 +262,7 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, // both outputs are required so can't be nullptr Tensor& Y = *ctx.Output(0, {num_batches}); - Tensor& Z = *ctx.Output(1, {num_batches, final_scores_per_batch}); + Tensor& Z = *ctx.Output(1, {num_batches, static_cast(final_scores_per_batch)}); auto final_scores = Z.MutableDataAsSpan(); @@ -276,7 +277,7 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, } int write_additional_scores = -1; - int64_t num_scores_per_batch = class_count_; + size_t num_scores_per_batch = class_count_; if (mode_ == SVM_TYPE::SVM_SVC && !have_proba) { num_scores_per_batch = num_classifiers; @@ -346,39 +347,39 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, // e.g. AB combines with BA. // If A has 3 support vectors and B has 2, there's a 3x2 block for AB and a 2x3 block for BA to combine - auto cur_kernels = kernels_span.subspan(n * SafeInt(vector_count_), onnxruntime::narrow(vector_count_)); - auto cur_scores = classifier_scores.subspan(n * SafeInt(num_slots_per_iteration), onnxruntime::narrow(num_classifiers)); - auto cur_votes = votes_span.subspan(n * SafeInt(class_count_), onnxruntime::narrow(class_count_)); + auto cur_kernels = kernels_span.subspan(n * SafeInt(vector_count_), vector_count_); + auto cur_scores = classifier_scores.subspan(n * SafeInt(num_slots_per_iteration), num_classifiers); + auto cur_votes = votes_span.subspan(n * SafeInt(class_count_), class_count_); auto scores_iter = cur_scores.begin(); size_t classifier_idx = 0; - for (int64_t i = 0; i < class_count_ - 1; i++) { - int64_t start_index_i = starting_vector_[onnxruntime::narrow(i)]; // start of support vectors for class i - int64_t class_i_support_count = vectors_per_class_[onnxruntime::narrow(i)]; - int64_t i_coeff_row_offset = vector_count_ * i; + for (size_t i = 0; i < class_count_ - 1; i++) { + size_t start_index_i = starting_vector_[i]; // start of support vectors for class i + size_t class_i_support_count = onnxruntime::narrow(vectors_per_class_[i]); + size_t i_coeff_row_offset = vector_count_ * i; - for (int64_t j = i + 1; j < class_count_; j++) { - int64_t start_index_j = starting_vector_[onnxruntime::narrow(j)]; // start of support vectors for class j - int64_t class_j_support_count = vectors_per_class_[onnxruntime::narrow(j)]; - int64_t j_coeff_row_offset = vector_count_ * (j - 1); + for (size_t j = i + 1; j < class_count_; j++) { + size_t start_index_j = starting_vector_[j]; // start of support vectors for class j + size_t class_j_support_count = onnxruntime::narrow(vectors_per_class_[j]); + size_t j_coeff_row_offset = vector_count_ * (j - 1); double sum = 0; - const float* val1 = &(coefficients_[j_coeff_row_offset + SafeInt(start_index_i)]); - const float* val2 = &(cur_kernels[onnxruntime::narrow(start_index_i)]); - for (int64_t m = 0; m < class_i_support_count; ++m, ++val1, ++val2) + const float* val1 = coefficients_.data() + (j_coeff_row_offset + start_index_i); + const float* val2 = cur_kernels.data() + start_index_i; + for (size_t m = 0; m < class_i_support_count; ++m, ++val1, ++val2) sum += *val1 * *val2; - val1 = &(coefficients_[i_coeff_row_offset + SafeInt(start_index_j)]); - val2 = &(cur_kernels[onnxruntime::narrow(start_index_j)]); + val1 = coefficients_.data() + (i_coeff_row_offset + start_index_j); + val2 = cur_kernels.data() + start_index_j; - for (int64_t m = 0; m < class_j_support_count; ++m, ++val1, ++val2) + for (size_t m = 0; m < class_j_support_count; ++m, ++val1, ++val2) sum += *val1 * *val2; sum += rho_[classifier_idx++]; *scores_iter++ = static_cast(sum); - ++(cur_votes[onnxruntime::narrow(sum > 0 ? i : j)]); + ++(cur_votes[sum > 0 ? i : j]); } } } @@ -389,23 +390,23 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, &classifier_scores_data, num_classifiers, &votes_data, &Y, num_scores_per_batch, write_additional_scores](ptrdiff_t idx) { int n = SafeInt(idx); // convert to a usable sized type - auto cur_scores = final_scores.subspan(n * SafeInt(final_scores_per_batch), onnxruntime::narrow(final_scores_per_batch)); + auto cur_scores = final_scores.subspan(n * SafeInt(final_scores_per_batch), final_scores_per_batch); if (mode_ == SVM_TYPE::SVM_SVC && have_proba) { - auto probsp2 = gsl::make_span(probsp2_data.data() + (n * class_count_squared), onnxruntime::narrow(class_count_squared)); + auto probsp2 = gsl::make_span(probsp2_data.data() + (n * class_count_squared), class_count_squared); float* classifier_scores = classifier_scores_data.data() + (n * num_classifiers); size_t index = 0; - for (int64_t i = 0; i < class_count_ - 1; ++i) { - int64_t p1 = i * class_count_ + i + 1; - int64_t p2 = (i + 1) * class_count_ + i; - for (int64_t j = i + 1; j < class_count_; ++j, ++index) { + for (size_t i = 0; i < class_count_ - 1; ++i) { + size_t p1 = i * class_count_ + i + 1; + size_t p2 = (i + 1) * class_count_ + i; + for (size_t j = i + 1; j < class_count_; ++j, ++index) { float val1 = sigmoid_probability(classifier_scores[index], proba_[index], probb_[index]); float val2 = std::max(val1, 1.0e-7f); val2 = std::min(val2, 1 - 1.0e-7f); - probsp2[onnxruntime::narrow(p1)] = val2; - probsp2[onnxruntime::narrow(p2)] = 1 - val2; + probsp2[p1] = val2; + probsp2[p2] = 1 - val2; ++p1; p2 += class_count_; } @@ -431,10 +432,10 @@ Status SVMClassifier::ComputeImpl(OpKernelContext& ctx, // onnx specs expects one column per class. if (num_classifiers == 1) { // binary case if (using_strings_) { - ChooseClass(Y, n, max_weight, maxclass, have_proba, weights_are_all_positive_, + ChooseClass(Y, n, max_weight, onnxruntime::narrow(maxclass), have_proba, weights_are_all_positive_, classlabels_strings_, "1", "0"); } else { - ChooseClass(Y, n, max_weight, maxclass, have_proba, weights_are_all_positive_, + ChooseClass(Y, n, max_weight, onnxruntime::narrow(maxclass), have_proba, weights_are_all_positive_, classlabels_ints_, 1, 0); } } else { // multiclass diff --git a/onnxruntime/core/providers/cpu/ml/svmclassifier.h b/onnxruntime/core/providers/cpu/ml/svmclassifier.h index e392d0915db68..4d7ed089089f2 100644 --- a/onnxruntime/core/providers/cpu/ml/svmclassifier.h +++ b/onnxruntime/core/providers/cpu/ml/svmclassifier.h @@ -121,12 +121,12 @@ class SVMClassifier final : public OpKernel, private SVMCommon { Status ComputeImpl(OpKernelContext& ctx, gsl::span x_data, const TensorShape& x_shape) const; bool weights_are_all_positive_; - ptrdiff_t feature_count_; - ptrdiff_t class_count_; - ptrdiff_t vector_count_; + size_t feature_count_; + size_t class_count_; + size_t vector_count_; bool using_strings_; std::vector vectors_per_class_; - std::vector starting_vector_; + std::vector starting_vector_; std::vector rho_; std::vector proba_; std::vector probb_; diff --git a/onnxruntime/test/providers/cpu/ml/svmclassifier_test.cc b/onnxruntime/test/providers/cpu/ml/svmclassifier_test.cc index 3c5b71b90b4b8..640c3a513e85d 100644 --- a/onnxruntime/test/providers/cpu/ml/svmclassifier_test.cc +++ b/onnxruntime/test/providers/cpu/ml/svmclassifier_test.cc @@ -438,5 +438,47 @@ TEST(MLOpTest, SVMClassifierDifferentSizeKernelParameters) { test.Run(OpTester::ExpectResult::kExpectFailure, "kernel_params must be empty or have 3 values"); } +TEST(MLOpTest, SVMClassifierSVCLinearUndersizedVectorPerClass) { + OpTester test("SVMClassifier", 1, onnxruntime::kMLDomain); + + std::vector coefficients = {0.766398549079895f, 0.0871576070785522f, 0.110420741140842f, + -0.963976919651031f}; + std::vector support_vectors = {4.80000019073486f, 3.40000009536743f, 1.89999997615814f, + 5.f, 3.f, 1.60000002384186f, + 4.5f, 2.29999995231628f, 1.29999995231628f, + 5.09999990463257f, 2.5f, 3.f}; + std::vector rho = {2.23510527610779f}; + std::vector kernel_params = {0.122462183237076f, 0.f, 3.f}; // gamma, coef0, degree + std::vector classes = {0, 1}; + std::vector vectors_per_class = {3}; // undersized: 2 classes but only 1 entry + + std::vector X = {5.1f, 3.5f, 1.4f, + 4.9f, 3.f, 1.4f, + 4.7f, 3.2f, 1.3f, + 4.6f, 3.1f, 1.5f, + 5.f, 3.6f, 1.4f}; + std::vector scores_predictions = {-1.5556798f, 1.5556798f, + -1.2610321f, 1.2610321f, + -1.5795376f, 1.5795376f, + -1.3083477f, 1.3083477f, + -1.6572928f, 1.6572928f}; + + std::vector class_predictions = {0, 0, 0, 0, 0}; + + test.AddAttribute("kernel_type", std::string("LINEAR")); + test.AddAttribute("coefficients", coefficients); + test.AddAttribute("support_vectors", support_vectors); + test.AddAttribute("vectors_per_class", vectors_per_class); + test.AddAttribute("rho", rho); + test.AddAttribute("kernel_params", kernel_params); + test.AddAttribute("classlabels_ints", classes); + + test.AddInput("X", {5, 3}, X); + test.AddOutput("Y", {5}, class_predictions); + test.AddOutput("Z", {5, 2}, scores_predictions); + + test.Run(OpTester::ExpectResult::kExpectFailure, "Mismatch between classlabels_ints/classlabels_strings and vectors_per_class dimensions."); +} + } // namespace test } // namespace onnxruntime