From 3c6d75b9d09145b22d6289d20e12ae382418e329 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 26 Jan 2026 21:24:45 +0000 Subject: [PATCH 1/8] add lpnorm 22 --- .../providers/cpu/cpu_execution_provider.cc | 18 ++++++++++++------ onnxruntime/core/providers/cpu/nn/lp_norm.cc | 12 ++++++++++-- .../onnx_backend_test_series_filters.jsonc | 3 --- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index db96089f7d053..c3e141e660071 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -194,8 +194,8 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, ConvTranspose); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 8, Flatten); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, InstanceNormalization); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, float, LpNormalization); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, LpNormalization); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, float, LpNormalization); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, double, LpNormalization); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 12, LRN); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 9, AveragePool); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 7, MaxPool); @@ -1325,6 +1325,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Softsign); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, ThresholdedRelu); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, AveragePool); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, LpNormalization); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, LpNormalization); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, Conv); @@ -1724,10 +1726,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -3380,6 +3382,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc index 2286800c9638b..03f85e8ea5705 100644 --- a/onnxruntime/core/providers/cpu/nn/lp_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/lp_norm.cc @@ -7,14 +7,22 @@ #include "core/providers/common.h" namespace onnxruntime { +#define REGISTER_LPNORMALISATION_VERSIONED_KERNEL(type, sinceVersion, endVersion) \ + ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \ + LpNormalization, sinceVersion, endVersion, type, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + LpNorm); + #define REGISTER_LPNORMALISATION_KERNEL(type, sinceVersion) \ ONNX_CPU_OPERATOR_TYPED_KERNEL( \ LpNormalization, sinceVersion, type, \ KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ LpNorm); -REGISTER_LPNORMALISATION_KERNEL(float, 1) -REGISTER_LPNORMALISATION_KERNEL(double, 1) +REGISTER_LPNORMALISATION_VERSIONED_KERNEL(float, 1, 21) +REGISTER_LPNORMALISATION_VERSIONED_KERNEL(double, 1, 21) +REGISTER_LPNORMALISATION_KERNEL(float, 22) +REGISTER_LPNORMALISATION_KERNEL(double, 22) using InnerStride = Eigen::InnerStride; diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 31078fc9e1e22..31bccd96491cc 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -39,7 +39,6 @@ "^test_attention_4d_with_past_and_present_qk_matmul_bias_4d_mask_causal*", // location of infinities "^test_attention_4d_attn_mask_3d_causal_expanded*", // webgpu "^test_attention_4d_diff_heads_mask4d_padded_kv*", // Need nonpad_kv_seqlen - "^test_l2normalization*", // LpNormalization(22) not implemented // TODO: support the following tests in Attention-cuda "^test_attention_3d_gqa.*_cuda", // GQA not supported in Attention-cuda "^test_attention_4d_gqa.*_cuda", // GQA not supported in Attention-cuda @@ -60,8 +59,6 @@ "^test_attention_4d_with_qk_matmul_softmax_cuda", // QK matmul + softmax not supported in Attention-cuda "^test_attention_3d_with_past_and_present_qk_matmul_softmax_cuda", // QK matmul + softmax not supported in Attention-cuda "^test_attention_4d_with_past_and_present_qk_matmul_bias_cuda", // QK matmul + bias not supported in Attention-cuda - "^test_l1normalization*", // LpNormalization(22) not implemented - "^test_lpnormalization*", // LpNormalization(22) not implemented "^test_tensorscatter*", // TensorScatter(24) not implemented "^test_castlike_no_saturate_FLOAT_to_FLOAT8*", // ORT does not support ml_dtypes "^test_castlike_UINT4_to*", // ORT does not support ml_dtypes From d5d7f9e97c3c770d6d457a8e203583990ce8324e Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 00:17:54 +0000 Subject: [PATCH 2/8] respect op spec in ONNX --- onnxruntime/core/providers/cpu/nn/lp_norm.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc index 03f85e8ea5705..93bcd73f5916d 100644 --- a/onnxruntime/core/providers/cpu/nn/lp_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/lp_norm.cc @@ -45,12 +45,7 @@ void DoNormalizeP2( StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); auto norm = xVec.template lpNorm<2>(); - if (norm != 0) { - yVec = xVec / norm; - } else { - // norm is zero, so set the result to zero - yVec.setZero(); - } + yVec = xVec / norm; } }; @@ -67,12 +62,7 @@ void DoNormalizeP1( StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); auto norm = xVec.template lpNorm<1>(); - if (norm != 0) { - yVec = xVec / norm; - } else { - // norm is zero - set the result to zero - yVec.setZero(); - } + yVec = xVec / norm; } }; From 891ffa8909d6981902bbd22d635b6f4876a1b0de Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 00:42:41 +0000 Subject: [PATCH 3/8] update the tests --- onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc index b7cead66bd7fb..2a773c20d7c6f 100644 --- a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include + #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" using namespace std; @@ -136,12 +138,13 @@ void L1NormalizationWithZeroNorm() { test.AddAttribute("p", static_cast(1)); // With default axis (axis = -1), one of the norms will be evaluated to zero - // for the following input + // for the following input. Per ONNX spec, 0/0 = NaN. vector input = {2.f, 2.f, 0.f, 0.f}; vector input_dims = {2, 2}; test.AddInput("input", input_dims, input); - vector expected_output = {0.5f, 0.5f, 0.f, 0.f}; + T nan_val = std::numeric_limits::quiet_NaN(); + vector expected_output = {0.5f, 0.5f, nan_val, nan_val}; test.AddOutput("Y", input_dims, expected_output); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } @@ -156,12 +159,13 @@ void L2NormalizationWithZeroNorm() { OpTester test("LpNormalization"); // With default axis (axis = -1), one of the norms will be evaluated to zero - // for the following input + // for the following input. Per ONNX spec, 0/0 = NaN. vector input = {1.f, 0.f, 0.f, 0.f}; vector input_dims = {2, 2}; test.AddInput("input", input_dims, input); - vector expected_output = {1.f, 0.f, 0.f, 0.f}; + T nan_val = std::numeric_limits::quiet_NaN(); + vector expected_output = {1.f, 0.f, nan_val, nan_val}; test.AddOutput("Y", input_dims, expected_output); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } From a4d7b897517327612aacc28d52ee6733ee3e2826 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 21:12:54 +0000 Subject: [PATCH 4/8] revert implemetation changes --- onnxruntime/core/providers/cpu/nn/lp_norm.cc | 26 ++++++++++--------- .../test/providers/cpu/nn/lp_norm_op_test.cc | 12 +++------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc index 93bcd73f5916d..2286800c9638b 100644 --- a/onnxruntime/core/providers/cpu/nn/lp_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/lp_norm.cc @@ -7,22 +7,14 @@ #include "core/providers/common.h" namespace onnxruntime { -#define REGISTER_LPNORMALISATION_VERSIONED_KERNEL(type, sinceVersion, endVersion) \ - ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \ - LpNormalization, sinceVersion, endVersion, type, \ - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ - LpNorm); - #define REGISTER_LPNORMALISATION_KERNEL(type, sinceVersion) \ ONNX_CPU_OPERATOR_TYPED_KERNEL( \ LpNormalization, sinceVersion, type, \ KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ LpNorm); -REGISTER_LPNORMALISATION_VERSIONED_KERNEL(float, 1, 21) -REGISTER_LPNORMALISATION_VERSIONED_KERNEL(double, 1, 21) -REGISTER_LPNORMALISATION_KERNEL(float, 22) -REGISTER_LPNORMALISATION_KERNEL(double, 22) +REGISTER_LPNORMALISATION_KERNEL(float, 1) +REGISTER_LPNORMALISATION_KERNEL(double, 1) using InnerStride = Eigen::InnerStride; @@ -45,7 +37,12 @@ void DoNormalizeP2( StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); auto norm = xVec.template lpNorm<2>(); - yVec = xVec / norm; + if (norm != 0) { + yVec = xVec / norm; + } else { + // norm is zero, so set the result to zero + yVec.setZero(); + } } }; @@ -62,7 +59,12 @@ void DoNormalizeP1( StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); auto norm = xVec.template lpNorm<1>(); - yVec = xVec / norm; + if (norm != 0) { + yVec = xVec / norm; + } else { + // norm is zero - set the result to zero + yVec.setZero(); + } } }; diff --git a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc index 2a773c20d7c6f..b7cead66bd7fb 100644 --- a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc +++ b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc @@ -1,8 +1,6 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -#include - #include "gtest/gtest.h" #include "test/providers/provider_test_utils.h" using namespace std; @@ -138,13 +136,12 @@ void L1NormalizationWithZeroNorm() { test.AddAttribute("p", static_cast(1)); // With default axis (axis = -1), one of the norms will be evaluated to zero - // for the following input. Per ONNX spec, 0/0 = NaN. + // for the following input vector input = {2.f, 2.f, 0.f, 0.f}; vector input_dims = {2, 2}; test.AddInput("input", input_dims, input); - T nan_val = std::numeric_limits::quiet_NaN(); - vector expected_output = {0.5f, 0.5f, nan_val, nan_val}; + vector expected_output = {0.5f, 0.5f, 0.f, 0.f}; test.AddOutput("Y", input_dims, expected_output); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } @@ -159,13 +156,12 @@ void L2NormalizationWithZeroNorm() { OpTester test("LpNormalization"); // With default axis (axis = -1), one of the norms will be evaluated to zero - // for the following input. Per ONNX spec, 0/0 = NaN. + // for the following input vector input = {1.f, 0.f, 0.f, 0.f}; vector input_dims = {2, 2}; test.AddInput("input", input_dims, input); - T nan_val = std::numeric_limits::quiet_NaN(); - vector expected_output = {1.f, 0.f, nan_val, nan_val}; + vector expected_output = {1.f, 0.f, 0.f, 0.f}; test.AddOutput("Y", input_dims, expected_output); test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } From 857c046690044b3f68fb74cc4943675d60ce7ce9 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 21:15:34 +0000 Subject: [PATCH 5/8] disable not supported l2norm test for now --- .../test/testdata/onnx_backend_test_series_filters.jsonc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 823ed63460cc9..cd57bc82aabf4 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -29,6 +29,8 @@ // Tests that are failing temporarily and should be fixed "current_failing_tests": [ + // TODO(titaiwang): onnx 1.21 should fix lpnorm zero norm issue + "^test_l2normalization*", // LpNormalization(22) not implemented "^test_adagrad", "^test_adagrad_multiple", "^test_attention_4d_fp16*", // precision issue: 1 / 192 mismatched elements From 3184cdc1caefb2841f379033c1a30dd6b92a4657 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 21:48:09 +0000 Subject: [PATCH 6/8] typo --- onnxruntime/core/providers/cpu/nn/lp_norm.cc | 88 +++++++++++--------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc index 2286800c9638b..190ef9ede34ff 100644 --- a/onnxruntime/core/providers/cpu/nn/lp_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/lp_norm.cc @@ -7,14 +7,22 @@ #include "core/providers/common.h" namespace onnxruntime { +#define REGISTER_LPNORMALISATION_VERSIONED_KERNEL(type, sinceVersion, endVersion) \ + ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( \ + LpNormalization, sinceVersion, endVersion, type, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ + LpNorm); + #define REGISTER_LPNORMALISATION_KERNEL(type, sinceVersion) \ ONNX_CPU_OPERATOR_TYPED_KERNEL( \ LpNormalization, sinceVersion, type, \ KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), \ LpNorm); -REGISTER_LPNORMALISATION_KERNEL(float, 1) -REGISTER_LPNORMALISATION_KERNEL(double, 1) +REGISTER_LPNORMALISATION_VERSIONED_KERNEL(float, 1, 21) +REGISTER_LPNORMALISATION_VERSIONED_KERNEL(double, 1, 21) +REGISTER_LPNORMALISATION_KERNEL(float, 22) +REGISTER_LPNORMALISATION_KERNEL(double, 22) using InnerStride = Eigen::InnerStride; @@ -43,48 +51,46 @@ void DoNormalizeP2( // norm is zero, so set the result to zero yVec.setZero(); } - } -}; + }; -template -void DoNormalizeP1( - const T* xData, - T* yData, - const int64_t m, - const int64_t n, - const int64_t sf) { - for (int i = 0; i < n; ++i) { - auto base = (i / sf) * sf * m + (i % sf); - ConstStridedVec xVec(xData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); - StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); + template + void DoNormalizeP1( + const T* xData, + T* yData, + const int64_t m, + const int64_t n, + const int64_t sf) { + for (int i = 0; i < n; ++i) { + auto base = (i / sf) * sf * m + (i % sf); + ConstStridedVec xVec(xData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); + StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); - auto norm = xVec.template lpNorm<1>(); - if (norm != 0) { - yVec = xVec / norm; - } else { - // norm is zero - set the result to zero - yVec.setZero(); - } - } -}; + auto norm = xVec.template lpNorm<1>(); + if (norm != 0) { + yVec = xVec / norm; + } else { + // norm is zero, so set the result to zero + yVec.setZero(); + } + }; -template -Status LpNorm::Compute(OpKernelContext* p_op_kernel_context) const { - const auto* input = p_op_kernel_context->Input(0); - const TensorShape& input_shape = input->Shape(); - Tensor* output = p_op_kernel_context->Output(0, input_shape); + template + Status LpNorm::Compute(OpKernelContext * p_op_kernel_context) const { + const auto* input = p_op_kernel_context->Input(0); + const TensorShape& input_shape = input->Shape(); + Tensor* output = p_op_kernel_context->Output(0, input_shape); - const auto canonical_axis = HandleNegativeAxis(axis_, static_cast(input_shape.NumDimensions())); - const int64_t m = input_shape.GetDims()[onnxruntime::narrow(canonical_axis)]; - const int64_t n = input_shape.Size() / m; - const int64_t sf = input_shape.SizeFromDimension(SafeInt(canonical_axis) + 1); + const auto canonical_axis = HandleNegativeAxis(axis_, static_cast(input_shape.NumDimensions())); + const int64_t m = input_shape.GetDims()[onnxruntime::narrow(canonical_axis)]; + const int64_t n = input_shape.Size() / m; + const int64_t sf = input_shape.SizeFromDimension(SafeInt(canonical_axis) + 1); - if (p_ == 1) { - DoNormalizeP1(input->Data(), output->MutableData(), m, n, sf); - } else if (p_ == 2) { - DoNormalizeP2(input->Data(), output->MutableData(), m, n, sf); - } + if (p_ == 1) { + DoNormalizeP1(input->Data(), output->MutableData(), m, n, sf); + } else if (p_ == 2) { + DoNormalizeP2(input->Data(), output->MutableData(), m, n, sf); + } - return Status::OK(); -} -} // namespace onnxruntime + return Status::OK(); + } + } // namespace onnxruntime From a470e5a68b6320f1cb5f4cd3139c736a993a3603 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 21:52:06 +0000 Subject: [PATCH 7/8] formatting --- onnxruntime/core/providers/cpu/nn/lp_norm.cc | 76 ++++++++++---------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/onnxruntime/core/providers/cpu/nn/lp_norm.cc b/onnxruntime/core/providers/cpu/nn/lp_norm.cc index 190ef9ede34ff..03f85e8ea5705 100644 --- a/onnxruntime/core/providers/cpu/nn/lp_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/lp_norm.cc @@ -51,46 +51,48 @@ void DoNormalizeP2( // norm is zero, so set the result to zero yVec.setZero(); } - }; + } +}; - template - void DoNormalizeP1( - const T* xData, - T* yData, - const int64_t m, - const int64_t n, - const int64_t sf) { - for (int i = 0; i < n; ++i) { - auto base = (i / sf) * sf * m + (i % sf); - ConstStridedVec xVec(xData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); - StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); +template +void DoNormalizeP1( + const T* xData, + T* yData, + const int64_t m, + const int64_t n, + const int64_t sf) { + for (int i = 0; i < n; ++i) { + auto base = (i / sf) * sf * m + (i % sf); + ConstStridedVec xVec(xData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); + StridedVec yVec(yData + base, 1, onnxruntime::narrow(m), InnerStride(onnxruntime::narrow(sf))); - auto norm = xVec.template lpNorm<1>(); - if (norm != 0) { - yVec = xVec / norm; - } else { - // norm is zero, so set the result to zero - yVec.setZero(); - } - }; + auto norm = xVec.template lpNorm<1>(); + if (norm != 0) { + yVec = xVec / norm; + } else { + // norm is zero - set the result to zero + yVec.setZero(); + } + } +}; - template - Status LpNorm::Compute(OpKernelContext * p_op_kernel_context) const { - const auto* input = p_op_kernel_context->Input(0); - const TensorShape& input_shape = input->Shape(); - Tensor* output = p_op_kernel_context->Output(0, input_shape); +template +Status LpNorm::Compute(OpKernelContext* p_op_kernel_context) const { + const auto* input = p_op_kernel_context->Input(0); + const TensorShape& input_shape = input->Shape(); + Tensor* output = p_op_kernel_context->Output(0, input_shape); - const auto canonical_axis = HandleNegativeAxis(axis_, static_cast(input_shape.NumDimensions())); - const int64_t m = input_shape.GetDims()[onnxruntime::narrow(canonical_axis)]; - const int64_t n = input_shape.Size() / m; - const int64_t sf = input_shape.SizeFromDimension(SafeInt(canonical_axis) + 1); + const auto canonical_axis = HandleNegativeAxis(axis_, static_cast(input_shape.NumDimensions())); + const int64_t m = input_shape.GetDims()[onnxruntime::narrow(canonical_axis)]; + const int64_t n = input_shape.Size() / m; + const int64_t sf = input_shape.SizeFromDimension(SafeInt(canonical_axis) + 1); - if (p_ == 1) { - DoNormalizeP1(input->Data(), output->MutableData(), m, n, sf); - } else if (p_ == 2) { - DoNormalizeP2(input->Data(), output->MutableData(), m, n, sf); - } + if (p_ == 1) { + DoNormalizeP1(input->Data(), output->MutableData(), m, n, sf); + } else if (p_ == 2) { + DoNormalizeP2(input->Data(), output->MutableData(), m, n, sf); + } - return Status::OK(); - } - } // namespace onnxruntime + return Status::OK(); +} +} // namespace onnxruntime From 55bcaa9c17f1e18a3cd053bb38a20895f7bd44ef Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Tue, 27 Jan 2026 23:24:35 +0000 Subject: [PATCH 8/8] update the op doc --- docs/OperatorKernels.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 7cc57a636362f..08840c623b709 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -240,7 +240,8 @@ Do not modify directly.* |||[13, 15]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(double), tensor(float)| +|LpNormalization|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(double), tensor(float)| +|||[1, 21]|**T** = tensor(double), tensor(float)| |LpPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| |||[18, 21]|**T** = tensor(float)| |||[11, 17]|**T** = tensor(float)|