From 7f32da235790990d3a85da07ac7eb198db1c5730 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 9 Mar 2026 21:34:17 +0000 Subject: [PATCH 01/18] Integrate ONNX 1.21.0: submodule, deps, patches, and build fixes Update ONNX submodule to rel-1.21.0 branch (commit fbbe45b8e2). Update cmake/deps.txt with new URL and SHA1. Update vcpkg port (portfile.cmake, vcpkg.json) for 1.21.0. Regenerate onnx.patch and binskim.patch for 1.21.0 CMakeLists.txt changes. Update all 7 requirements.txt files to onnx==1.21.0. Bump kMaxSupportedOpset from 25 to 26 in optimizer_api.h. Fix ONNX_UNUSED macro removal (replaced with [[maybe_unused]]) in contrib_defs.h, dml_defs.h, and test_opaque_api.cc. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/deps.txt | 2 +- cmake/external/onnx | 2 +- cmake/patches/onnx/onnx.patch | 16 ++++++++-------- cmake/vcpkg-ports/onnx/binskim.patch | 16 ++++++++-------- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- cmake/vcpkg-ports/onnx/vcpkg.json | 4 ++-- .../core/graph/contrib_ops/contrib_defs.h | 14 ++++++-------- onnxruntime/core/graph/dml_ops/dml_defs.h | 14 ++++++-------- .../transpose_optimization/optimizer_api.h | 2 +- onnxruntime/test/opaque_api/test_opaque_api.cc | 7 +++---- onnxruntime/test/python/requirements.txt | 2 +- .../aarch64/python/cpu/scripts/requirements.txt | 2 +- .../linux/docker/scripts/lort/requirements.txt | 2 +- .../docker/scripts/manylinux/requirements.txt | 2 +- .../github/linux/docker/scripts/requirements.txt | 2 +- .../github/linux/python/requirements.txt | 2 +- .../github/windows/python/requirements.txt | 2 +- 17 files changed, 45 insertions(+), 50 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index a5eaf2ed69efb..2d7196646434f 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.20.1.zip;30b80c81a1a381188896e86abe460c3c3f3091fd +onnx;https://github.com/onnx/onnx/archive/fbbe45b8e25b5b0018cc038caaf906d3b09634ee.zip;c38208d94ec0dd799a8468ac72f6058f74d44830 # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/external/onnx b/cmake/external/onnx index d3f6b795aedb4..fbbe45b8e25b5 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit d3f6b795aedb48eaecc881bf5e8f5dd6efbe25b3 +Subproject commit fbbe45b8e25b5b0018cc038caaf906d3b09634ee diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch index 76e94a71364bf..0a5680778790b 100644 --- a/cmake/patches/onnx/onnx.patch +++ b/cmake/patches/onnx/onnx.patch @@ -1,16 +1,16 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 584c0419a..5d4ffff99 100644 +index 044996e..ded7e39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -52,6 +52,7 @@ option(ONNX_USE_LITE_PROTO "Use lite protobuf instead of full." OFF) +@@ -53,6 +53,7 @@ option(ONNX_USE_LITE_PROTO "Use lite protobuf instead of full." OFF) option(ONNX_DISABLE_EXCEPTIONS "Disable exception handling." OFF) option(ONNX_DISABLE_STATIC_REGISTRATION "Disable static registration for ONNX operator schemas." OFF) option(ONNX_USE_UNITY_BUILD "Enable Unity (Jumbo) build for" OFF) +option(ONNX_MINIMAL_BUILD "Build only essential ONNX components" OFF) + option(ONNX_INSTALL "Install ONNX targets, headers, and CMake config files" ON) if(WIN32) option(ONNX_USE_MSVC_STATIC_RUNTIME "Build with MSVC static runtime" OFF) - endif() -@@ -397,14 +398,28 @@ relative_protobuf_generate_cpp(ONNX_PROTO_SRCS +@@ -399,14 +400,28 @@ relative_protobuf_generate_cpp(ONNX_PROTO_SRCS onnx/onnx-operators.in.proto onnx/onnx-data.in.proto) @@ -48,10 +48,10 @@ index 584c0419a..5d4ffff99 100644 set(LINKED_PROTOBUF_TARGET protobuf::libprotobuf) if(ONNX_USE_LITE_PROTO) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake -index 07f2b9071..388d9f7a3 100644 +index 1987edd..04b3088 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake -@@ -31,18 +31,7 @@ endfunction() +@@ -103,18 +103,7 @@ endfunction() function(add_onnx_compile_options target) if(MSVC) @@ -71,10 +71,10 @@ index 07f2b9071..388d9f7a3 100644 target_compile_options(${target} PRIVATE "/WX") endif() diff --git a/onnx/defs/nn/old.cc b/onnx/defs/nn/old.cc -index 887151217..ac2e8c463 100644 +index a6a8a83..153da87 100644 --- a/onnx/defs/nn/old.cc +++ b/onnx/defs/nn/old.cc -@@ -4152,7 +4152,6 @@ ONNX_OPERATOR_SET_SCHEMA( +@@ -4026,7 +4026,6 @@ ONNX_OPERATOR_SET_SCHEMA( GroupNormalization, 18, OpSchema() diff --git a/cmake/vcpkg-ports/onnx/binskim.patch b/cmake/vcpkg-ports/onnx/binskim.patch index 76e94a71364bf..0a5680778790b 100644 --- a/cmake/vcpkg-ports/onnx/binskim.patch +++ b/cmake/vcpkg-ports/onnx/binskim.patch @@ -1,16 +1,16 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 584c0419a..5d4ffff99 100644 +index 044996e..ded7e39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -52,6 +52,7 @@ option(ONNX_USE_LITE_PROTO "Use lite protobuf instead of full." OFF) +@@ -53,6 +53,7 @@ option(ONNX_USE_LITE_PROTO "Use lite protobuf instead of full." OFF) option(ONNX_DISABLE_EXCEPTIONS "Disable exception handling." OFF) option(ONNX_DISABLE_STATIC_REGISTRATION "Disable static registration for ONNX operator schemas." OFF) option(ONNX_USE_UNITY_BUILD "Enable Unity (Jumbo) build for" OFF) +option(ONNX_MINIMAL_BUILD "Build only essential ONNX components" OFF) + option(ONNX_INSTALL "Install ONNX targets, headers, and CMake config files" ON) if(WIN32) option(ONNX_USE_MSVC_STATIC_RUNTIME "Build with MSVC static runtime" OFF) - endif() -@@ -397,14 +398,28 @@ relative_protobuf_generate_cpp(ONNX_PROTO_SRCS +@@ -399,14 +400,28 @@ relative_protobuf_generate_cpp(ONNX_PROTO_SRCS onnx/onnx-operators.in.proto onnx/onnx-data.in.proto) @@ -48,10 +48,10 @@ index 584c0419a..5d4ffff99 100644 set(LINKED_PROTOBUF_TARGET protobuf::libprotobuf) if(ONNX_USE_LITE_PROTO) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake -index 07f2b9071..388d9f7a3 100644 +index 1987edd..04b3088 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake -@@ -31,18 +31,7 @@ endfunction() +@@ -103,18 +103,7 @@ endfunction() function(add_onnx_compile_options target) if(MSVC) @@ -71,10 +71,10 @@ index 07f2b9071..388d9f7a3 100644 target_compile_options(${target} PRIVATE "/WX") endif() diff --git a/onnx/defs/nn/old.cc b/onnx/defs/nn/old.cc -index 887151217..ac2e8c463 100644 +index a6a8a83..153da87 100644 --- a/onnx/defs/nn/old.cc +++ b/onnx/defs/nn/old.cc -@@ -4152,7 +4152,6 @@ ONNX_OPERATOR_SET_SCHEMA( +@@ -4026,7 +4026,6 @@ ONNX_OPERATOR_SET_SCHEMA( GroupNormalization, 18, OpSchema() diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index c526529466e97..ce95fa0e4535e 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF "v${VERSION}" - SHA512 4bbc4c09e4bb3eb6049d653ce49200564e8c5dcf1154a30f894f24e15f1986d1f2fe2f4ca32fe383c559e2a0b20681f33d649376bf63e4345df6972a2c78eac8 + REF fbbe45b8e25b5b0018cc038caaf906d3b09634ee + SHA512 971a71b6d0fdb96270f82851c6a5940cc1c34d224247b678033ba179ffd8cc7bfecf59b235d013a0b94d089bd7d6fe46d01b2d6f5056bdb9fdff98fba0cc4e27 PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch diff --git a/cmake/vcpkg-ports/onnx/vcpkg.json b/cmake/vcpkg-ports/onnx/vcpkg.json index 5800b031aa143..5ad70ff409e05 100644 --- a/cmake/vcpkg-ports/onnx/vcpkg.json +++ b/cmake/vcpkg-ports/onnx/vcpkg.json @@ -1,7 +1,7 @@ { "name": "onnx", - "version-semver": "1.20.1", - "port-version": 1, + "version-semver": "1.21.0", + "port-version": 0, "description": "Open standard for machine learning interoperability", "homepage": "https://onnx.ai", "license": "Apache-2.0", diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.h b/onnxruntime/core/graph/contrib_ops/contrib_defs.h index 5b3904669f9fc..f88257b2baf08 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.h +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.h @@ -35,19 +35,17 @@ inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) { ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) +#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) \ + static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ + [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) #define ONNX_CONTRIB_OPERATOR_SCHEMA_ELSEWHERE(name, schema_func) \ ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func) #define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \ ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) -#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) +#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ + static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ + [[maybe_unused]] = schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) void RegisterContribSchemas(); void RegisterNchwcSchemas(); diff --git a/onnxruntime/core/graph/dml_ops/dml_defs.h b/onnxruntime/core/graph/dml_ops/dml_defs.h index 5479005382ec9..9551f72adfe17 100644 --- a/onnxruntime/core/graph/dml_ops/dml_defs.h +++ b/onnxruntime/core/graph/dml_ops/dml_defs.h @@ -11,19 +11,17 @@ namespace dml { MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) +#define MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) \ + static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ + [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) #define MS_DML_OPERATOR_SCHEMA_ELSEWHERE(name, schema_func) \ MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func) #define MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \ MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) -#define MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) +#define MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ + static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ + [[maybe_unused]] = schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) void RegisterDmlSchemas(); } // namespace dml diff --git a/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h b/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h index 6ff4da05fbf57..012777897c3a1 100644 --- a/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h +++ b/onnxruntime/core/optimizer/transpose_optimization/optimizer_api.h @@ -466,7 +466,7 @@ class GraphRef { } // namespace api constexpr int64_t kMinSupportedOpset = 7; -constexpr int64_t kMaxSupportedOpset = 25; +constexpr int64_t kMaxSupportedOpset = 26; // enum of results that a CostCheckFn can return. enum class CostCheckResult { diff --git a/onnxruntime/test/opaque_api/test_opaque_api.cc b/onnxruntime/test/opaque_api/test_opaque_api.cc index da3ad08ae1ce2..e39e76a912aec 100644 --- a/onnxruntime/test/opaque_api/test_opaque_api.cc +++ b/onnxruntime/test/opaque_api/test_opaque_api.cc @@ -118,10 +118,9 @@ ONNX_OPERATOR_KERNEL_EX( ONNX_TEST_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define ONNX_TEST_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) \ - static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce( \ - op_schema_register_once##name##Counter) ONNX_UNUSED = \ - ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) +#define ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) \ + static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ + [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) static void RegisterCustomKernel() { // Register our custom type diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index 8ddba809b9228..3ece2f39d4042 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.20.1 +onnx==1.21.0 pytest onnx-ir diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index 7e2b6e74cfdde..b4c2f163e22ac 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.20.1 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index 63a8e96d8c128..eb52681341012 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.20.1 +onnx==1.21.0 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index ffcad5ee67208..9a0a6d0f51900 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.20.1 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index ad57cc715589b..3d886832e1ccb 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.20.1 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 994776e8fb6fd..bfe9ab0d8a508 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.20.1 +onnx==1.21.0 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index 83593ff47e453..2dfba37c6f381 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.20.1 +onnx==1.21.0 From 5023110b8b65e098dd4cbe75bd8a146f254e8983 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 9 Mar 2026 21:34:41 +0000 Subject: [PATCH 02/18] Add BitCast and CumProd CPU kernels for ONNX opset 26 BitCast (opset 26): Zero-copy tensor type reinterpretation for types with matching bit-widths. Supports all standard numeric types. Registered in cpu_execution_provider.cc with 17 passing tests. CumProd (opset 26): Cumulative product along a given axis with optional exclusive and reverse attributes. Supports float, double, int32, int64, uint32, uint64. Identity element is 1 (vs 0 for CumSum). Registered in cpu_execution_provider.cc with 33 passing tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../providers/cpu/cpu_execution_provider.cc | 18 + .../core/providers/cpu/math/cumprod.cc | 222 ++++++++++ onnxruntime/core/providers/cpu/math/cumprod.h | 28 ++ .../core/providers/cpu/tensor/bitcast_op.cc | 78 ++++ .../core/providers/cpu/tensor/bitcast_op.h | 21 + .../test/providers/cpu/math/cumprod_test.cc | 409 ++++++++++++++++++ .../providers/cpu/tensor/bitcast_op_test.cc | 183 ++++++++ 7 files changed, 959 insertions(+) create mode 100644 onnxruntime/core/providers/cpu/math/cumprod.cc create mode 100644 onnxruntime/core/providers/cpu/math/cumprod.h create mode 100644 onnxruntime/core/providers/cpu/tensor/bitcast_op.cc create mode 100644 onnxruntime/core/providers/cpu/tensor/bitcast_op.h create mode 100644 onnxruntime/test/providers/cpu/math/cumprod_test.cc create mode 100644 onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 74b8f8e468097..f7b4918546067 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -1497,6 +1497,15 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 25, Un class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 25, Scan); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 25, Size); +// Opset 26 +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, BitCast); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, float, CumProd); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, double, CumProd); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, int32_t, CumProd); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, int64_t, CumProd); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, uint32_t, CumProd); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 26, uint64_t, CumProd); + // !!PLEASE READ BELOW!! Following that, add new entries above this comment /* *** IMPORTANT! *** @@ -3661,6 +3670,15 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + + // opset 26 + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { KernelCreateInfo info = function_table_entry(); diff --git a/onnxruntime/core/providers/cpu/math/cumprod.cc b/onnxruntime/core/providers/cpu/math/cumprod.cc new file mode 100644 index 0000000000000..6706c2d3ea8d0 --- /dev/null +++ b/onnxruntime/core/providers/cpu/math/cumprod.cc @@ -0,0 +1,222 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include + +#include "cumprod.h" +#include "core/providers/common.h" +#include "core/providers/cpu/tensor/utils.h" +#include "core/framework/op_kernel.h" +#include "core/framework/tensorprotoutils.h" + +using namespace onnxruntime; + +namespace onnxruntime { + +namespace cumprod_op { +Status GetAxis(const Tensor* axis_tensor, int64_t input_rank, int64_t& axis_out) { + if (!axis_tensor) + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Axis tensor must be provided to the CumProd op"); + + if (axis_tensor->Shape().NumDimensions() > 1) + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Axis tensor should be 0D or 1D"); + + if (axis_tensor->IsDataType()) { + axis_out = static_cast(axis_tensor->Data()[0]); + } else if (axis_tensor->IsDataType()) { + axis_out = axis_tensor->Data()[0]; + } else { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Axis tensor should be of type `int32_t` or `int64_t`"); + } + + axis_out = HandleNegativeAxis(axis_out, input_rank); + + return Status::OK(); +} + +} // namespace cumprod_op + +// Opset 26 kernels +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + float, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + double, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + int32_t, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + int64_t, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + uint32_t, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + CumProd, + 26, + uint64_t, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}), + CumProd); + +template +CumProd::CumProd(const OpKernelInfo& info) : OpKernel(info), exclusive_(), reverse_() { + int64_t exclusive = 0; + auto status = info.GetAttr("exclusive", &exclusive); + if (status.IsOK()) { + if (exclusive == 1 || exclusive == 0) { + exclusive_ = exclusive; + } else { + ORT_ENFORCE(false, "attribute exclusive can only be 0 or 1"); + } + } + int64_t reverse = 0; + status = info.GetAttr("reverse", &reverse); + if (status.IsOK()) { + if (reverse == 1 || reverse == 0) { + reverse_ = reverse; + } else { + ORT_ENFORCE(false, "attribute reverse can only be 0 or 1"); + } + } +} + +template +Status CumProd::Compute(OpKernelContext* ctx) const { + const Tensor* input = ctx->Input(0); + size_t rank = input->Shape().NumDimensions(); + if (rank == 0) + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Cannot apply CumProd operator on a scalar"); + + const Tensor* axis_tensor = ctx->Input(1); + + TensorShape output_shape(input->Shape()); + auto& output_tensor = *ctx->Output(0, output_shape); + + if (output_shape.Size() == 0) + return Status::OK(); + + int64_t axis_input = 0; + ORT_THROW_IF_ERROR(cumprod_op::GetAxis(axis_tensor, rank, axis_input)); + + // We solve the problem by using the identity that (in the case of exclusive) + // 1) out[upper_dims...][0][lower_dims...] = 1 + // 2) out[upper_dims...][i][lower_dims...] = + // in[upper_dims...][i-1][lower_dims...] * out[upper_dims...][i-1][lower_dims...] + // We loop through the [upper_dims...] and start applying the identity in each slice. + // Since the [lower_dims...] are adjacent in memory, we can multiply them like vectors. + + const auto input_shape = input->Shape().GetDims(); + const size_t axis = onnxruntime::narrow(axis_input); + const int64_t dim = input->Shape()[axis]; // dimension size for the axis + const int64_t upper_dim_count = // number of slices we can walk through iteratively + std::accumulate(input_shape.begin(), input_shape.begin() + axis, static_cast(1), std::multiplies()); + const int64_t lower_dim_size = // sizes of the slices we can treat as 1D arrays + std::accumulate(input_shape.begin() + axis + 1, input_shape.end(), static_cast(1), std::multiplies()); + + if (!reverse_) { + const auto* input_iter = input->Data(); + auto* output_iter = output_tensor.MutableData(); + const auto* prev_output_iter = output_iter; + + if (exclusive_) { + for (int64_t outer = 0; outer < upper_dim_count; outer++) { + prev_output_iter = output_iter; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + *(output_iter++) = static_cast(1); + } + for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + *(output_iter++) = *(prev_output_iter++) * *(input_iter++); + } + } + input_iter += lower_dim_size; + } + } else { + for (int64_t outer = 0; outer < upper_dim_count; outer++) { + prev_output_iter = output_iter; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + *(output_iter++) = *(input_iter++); + } + for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + *(output_iter++) = *(prev_output_iter++) * *(input_iter++); + } + } + } + } + } else { + const auto* input_iter = input->Data() + input->Shape().Size(); + auto* output_iter = output_tensor.MutableData() + output_shape.Size(); + const auto* prev_output_iter = output_iter; + + if (exclusive_) { + for (int64_t outer = upper_dim_count - 1; outer >= 0; outer--) { + prev_output_iter = output_iter; + for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { + *(--output_iter) = static_cast(1); + } + for (int64_t cum_axis = dim - 1; cum_axis > 0; cum_axis--) { + for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { + *(--output_iter) = *(--prev_output_iter) * *(--input_iter); + } + } + input_iter -= lower_dim_size; + } + } else { + for (int64_t outer = upper_dim_count - 1; outer >= 0; outer--) { + prev_output_iter = output_iter; + for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { + *(--output_iter) = *(--input_iter); + } + for (int64_t cum_axis = dim - 1; cum_axis > 0; cum_axis--) { + for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { + *(--output_iter) = *(--prev_output_iter) * *(--input_iter); + } + } + } + } + } + + return Status::OK(); +} + +}; // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/math/cumprod.h b/onnxruntime/core/providers/cpu/math/cumprod.h new file mode 100644 index 0000000000000..9b8c6a83cc187 --- /dev/null +++ b/onnxruntime/core/providers/cpu/math/cumprod.h @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/common.h" +#include "core/framework/op_kernel.h" + +namespace onnxruntime { + +template +class CumProd final : public OpKernel { + public: + explicit CumProd(const OpKernelInfo& op_kernel_info); + + Status Compute(OpKernelContext* p_op_kernel_context) const override; + + private: + int64_t exclusive_; + int64_t reverse_; +}; + +namespace cumprod_op { + +Status GetAxis(const Tensor* axis_tensor, int64_t input_rank, int64_t& axis_out); + +} // namespace cumprod_op +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc new file mode 100644 index 0000000000000..9d9fa3e0c462b --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "bitcast_op.h" +#include "core/framework/op_kernel.h" +#include "core/framework/tensor.h" + +#include + +namespace onnxruntime { + +ONNX_CPU_OPERATOR_KERNEL( + BitCast, + 26, + KernelDefBuilder() + .TypeConstraint("T1", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .MayInplace(0, 0), + BitCast); + +BitCast::BitCast(const OpKernelInfo& info) : OpKernel(info) { + int64_t to; + Status status = info.GetAttr("to", &to); + ORT_ENFORCE(status.IsOK(), "Attribute 'to' is not set."); + to_ = gsl::narrow_cast(to); +} + +Status BitCast::Compute(OpKernelContext* context) const { + const Tensor* input = context->Input(0); + ORT_ENFORCE(input != nullptr, "BitCast: input tensor is null."); + + const size_t input_element_size = input->DataType()->Size(); + + const auto* output_type = DataTypeImpl::TensorTypeFromONNXEnum(to_); + const size_t output_element_size = output_type->GetElementType()->Size(); + + ORT_RETURN_IF_NOT(input_element_size == output_element_size, + "BitCast requires input and output types to have the same bit-width. ", + "Input element size: ", input_element_size, " bytes, ", + "output element size: ", output_element_size, " bytes."); + + Tensor* output = context->Output(0, input->Shape()); + + const size_t num_bytes = input->SizeInBytes(); + if (num_bytes > 0) { + const void* src = input->DataRaw(); + void* dst = output->MutableDataRaw(); + if (src != dst) { + std::memcpy(dst, src, num_bytes); + } + } + + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/bitcast_op.h b/onnxruntime/core/providers/cpu/tensor/bitcast_op.h new file mode 100644 index 0000000000000..dd5bdecde56c4 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/bitcast_op.h @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/common.h" +#include "core/framework/op_kernel.h" +#include "core/graph/onnx_protobuf.h" + +namespace onnxruntime { + +class BitCast final : public OpKernel { + public: + explicit BitCast(const OpKernelInfo& info); + Status Compute(OpKernelContext* context) const override; + + private: + ONNX_NAMESPACE::TensorProto_DataType to_; +}; + +} // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/math/cumprod_test.cc b/onnxruntime/test/providers/cpu/math/cumprod_test.cc new file mode 100644 index 0000000000000..3f9596e43aa40 --- /dev/null +++ b/onnxruntime/test/providers/cpu/math/cumprod_test.cc @@ -0,0 +1,409 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" +#include "test/util/include/default_providers.h" +#include "core/util/math.h" + +namespace onnxruntime { +namespace test { + +// 1D tests - basic functionality +TEST(CumProdTest, _1DTest) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1.f, 2.f, 6.f, 24.f, 120.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestInvalidAxis) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {-3}); + test.AddOutput("y", {5}, {1.f, 2.f, 6.f, 24.f, 120.f}); + test.Run(OpTester::ExpectResult::kExpectFailure, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestNegAxis) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {-1}); + test.AddOutput("y", {5}, {1.f, 2.f, 6.f, 24.f, 120.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Exclusive mode: identity element is 1, shift right +// input: [1, 2, 3, 4, 5] +// output: [1, 1, 2, 6, 24] +TEST(CumProdTest, _1DTestExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1.f, 1.f, 2.f, 6.f, 24.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Exclusive with axis dim=1: all elements should be identity (1) +TEST(CumProdTest, _1DTestExclusiveAxisHasSingleValue) { + { + // forward + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {1, 2}, {3.f, 4.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {1, 2}, {1.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); + } + { + // reverse + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddAttribute("reverse", 1); + test.AddInput("x", {1, 2}, {3.f, 4.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {1, 2}, {1.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); + } +} + +// 2D tests +// input: [[1, 2, 3], [4, 5, 6]], axis=0 +// output: [[1, 2, 3], [4, 10, 18]] +TEST(CumProdTest, _2DTestAxis0) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {2, 3}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {2, 3}, {1.f, 2.f, 3.f, 4.f, 10.f, 18.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// input: [[1, 2, 3], [4, 5, 6]], axis=1 +// output: [[1, 2, 6], [4, 20, 120]] +TEST(CumProdTest, _2DTestAxis1) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {2, 3}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f}); + test.AddInput("axis", {}, {1}); + test.AddOutput("y", {2, 3}, {1.f, 2.f, 6.f, 4.f, 20.f, 120.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Exclusive 2D axis=0: identity row, then element-wise product with input +// input: [[1, 2, 3], [4, 5, 6]], axis=0, exclusive +// output: [[1, 1, 1], [1, 2, 3]] +TEST(CumProdTest, _2DTestExclusiveAxis0) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {2, 3}, {1.f, 1.f, 1.f, 1.f, 2.f, 3.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Exclusive 2D axis=1 +// input: [[1, 2, 3], [4, 5, 6]], axis=1, exclusive +// output: [[1, 1, 2], [1, 4, 20]] +TEST(CumProdTest, _2DTestExclusiveAxis1) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3}, {1.f, 2.f, 3.f, 4.f, 5.f, 6.f}); + test.AddInput("axis", {}, {1}); + test.AddOutput("y", {2, 3}, {1.f, 1.f, 2.f, 1.f, 4.f, 20.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// 3D tests with shape {2, 3, 4} +// Using values 1..24 +TEST(CumProdTest, _3DTestAxis0) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {0}); + // axis=0: product along first dimension + // out[0,:,:] = x[0,:,:], out[1,:,:] = x[0,:,:] * x[1,:,:] + test.AddOutput("y", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 28.f, 45.f, 64.f, 85.f, 108.f, 133.f, 160.f, 189.f, 220.f, 253.f, 288.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis1) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {1}); + // axis=1: product along second dimension + // out[:,0,:] = x[:,0,:], out[:,1,:] = x[:,0,:]*x[:,1,:], out[:,2,:] = x[:,0,:]*x[:,1,:]*x[:,2,:] + test.AddOutput("y", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 12.f, 21.f, 32.f, 45.f, 120.f, 231.f, 384.f, + 13.f, 14.f, 15.f, 16.f, 221.f, 252.f, 285.f, 320.f, 4641.f, 5544.f, 6555.f, 7680.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis2) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {2}); + // axis=2: product along last dimension + // out[:,:,0] = x[:,:,0], out[:,:,1] = x[:,:,0]*x[:,:,1], etc. + test.AddOutput("y", {2, 3, 4}, + {1.f, 2.f, 6.f, 24.f, 5.f, 30.f, 210.f, 1680.f, 9.f, 90.f, 990.f, 11880.f, + 13.f, 182.f, 2730.f, 43680.f, 17.f, 306.f, 5814.f, 116280.f, 21.f, 462.f, 10626.f, 255024.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// 3D exclusive tests +TEST(CumProdTest, _3DTestAxis0Exclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {0}); + // exclusive axis=0: first slice is all 1s, second = x[0,:,:] + test.AddOutput("y", {2, 3, 4}, + {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, + 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis1Exclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {1}); + // exclusive axis=1: out[:,0,:] = 1, out[:,1,:] = x[:,0,:], out[:,2,:] = x[:,0,:]*x[:,1,:] + test.AddOutput("y", {2, 3, 4}, + {1.f, 1.f, 1.f, 1.f, 1.f, 2.f, 3.f, 4.f, 5.f, 12.f, 21.f, 32.f, + 1.f, 1.f, 1.f, 1.f, 13.f, 14.f, 15.f, 16.f, 221.f, 252.f, 285.f, 320.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis2Exclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {2}); + // exclusive axis=2: out[:,:,0] = 1, out[:,:,1] = x[:,:,0], out[:,:,2] = x[:,:,0]*x[:,:,1], etc. + test.AddOutput("y", {2, 3, 4}, + {1.f, 1.f, 2.f, 6.f, 1.f, 5.f, 30.f, 210.f, 1.f, 9.f, 90.f, 990.f, + 1.f, 13.f, 182.f, 2730.f, 1.f, 17.f, 306.f, 5814.f, 1.f, 21.f, 462.f, 10626.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Reverse tests +// input: [1, 2, 3, 4, 5], reverse +// output: [120, 120, 60, 20, 5] +TEST(CumProdTest, _1DTestReverse) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {120.f, 120.f, 60.f, 20.f, 5.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Reverse exclusive +// input: [1, 2, 3, 4, 5], reverse, exclusive +// output: [120, 60, 20, 5, 1] +TEST(CumProdTest, _1DTestReverseExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddAttribute("reverse", 1); + test.AddInput("x", {5}, {1.f, 2.f, 3.f, 4.f, 5.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {120.f, 60.f, 20.f, 5.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// 3D reverse tests +TEST(CumProdTest, _3DTestAxis0Reverse) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {0}); + // reverse axis=0: out[1,:,:]=x[1,:,:], out[0,:,:]=x[0,:,:]*x[1,:,:] + test.AddOutput("y", {2, 3, 4}, + {13.f, 28.f, 45.f, 64.f, 85.f, 108.f, 133.f, 160.f, 189.f, 220.f, 253.f, 288.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis1Reverse) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {1}); + // reverse axis=1: out[:,2,:]=x[:,2,:], out[:,1,:]=x[:,1,:]*x[:,2,:], out[:,0,:]=x[:,0,:]*x[:,1,:]*x[:,2,:] + test.AddOutput("y", {2, 3, 4}, + {45.f, 120.f, 231.f, 384.f, 45.f, 60.f, 77.f, 96.f, 9.f, 10.f, 11.f, 12.f, + 4641.f, 5544.f, 6555.f, 7680.f, 357.f, 396.f, 437.f, 480.f, 21.f, 22.f, 23.f, 24.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis2Reverse) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {2}); + // reverse axis=2: out[:,:,3]=x[:,:,3], out[:,:,2]=x[:,:,2]*x[:,:,3], etc. + test.AddOutput("y", {2, 3, 4}, + {24.f, 24.f, 12.f, 4.f, 1680.f, 336.f, 56.f, 8.f, 11880.f, 1320.f, 132.f, 12.f, + 43680.f, 3360.f, 240.f, 16.f, 116280.f, 6840.f, 380.f, 20.f, 255024.f, 12144.f, 552.f, 24.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// 3D reverse exclusive tests +TEST(CumProdTest, _3DTestAxis0ReverseExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {0}); + // reverse exclusive axis=0: out[1,:,:]=1, out[0,:,:]=x[1,:,:] + test.AddOutput("y", {2, 3, 4}, + {13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, + 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis1ReverseExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {1}); + // reverse exclusive axis=1: out[:,2,:]=1, out[:,1,:]=x[:,2,:], out[:,0,:]=x[:,1,:]*x[:,2,:] + test.AddOutput("y", {2, 3, 4}, + {45.f, 60.f, 77.f, 96.f, 9.f, 10.f, 11.f, 12.f, 1.f, 1.f, 1.f, 1.f, + 357.f, 396.f, 437.f, 480.f, 21.f, 22.f, 23.f, 24.f, 1.f, 1.f, 1.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _3DTestAxis2ReverseExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {2, 3, 4}, + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, + 13.f, 14.f, 15.f, 16.f, 17.f, 18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f}); + test.AddInput("axis", {}, {2}); + // reverse exclusive axis=2: out[:,:,3]=1, out[:,:,2]=x[:,:,3], out[:,:,1]=x[:,:,2]*x[:,:,3], etc. + test.AddOutput("y", {2, 3, 4}, + {24.f, 12.f, 4.f, 1.f, 336.f, 56.f, 8.f, 1.f, 1320.f, 132.f, 12.f, 1.f, + 3360.f, 240.f, 16.f, 1.f, 6840.f, 380.f, 20.f, 1.f, 12144.f, 552.f, 24.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Type-specific tests +TEST(CumProdTest, _1DTestInt32) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1, 2, 3, 4, 5}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1, 2, 6, 24, 120}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestInt64) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1, 2, 3, 4, 5}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1, 2, 6, 24, 120}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestDouble) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1., 2., 3., 4., 5.}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1., 2., 6., 24., 120.}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestDouble_WithInt64Axis) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1., 2., 3., 4., 5.}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1., 2., 6., 24., 120.}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestUint32) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1, 2, 3, 4, 5}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1, 2, 6, 24, 120}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +TEST(CumProdTest, _1DTestUint64) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {5}, {1, 2, 3, 4, 5}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {5}, {1, 2, 6, 24, 120}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Matches ONNX spec example exactly +// input: [1, 2, 3], axis=0 -> [1, 2, 6] +TEST(CumProdTest, _OnnxSpecExample) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddInput("x", {3}, {1.f, 2.f, 3.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {3}, {1.f, 2.f, 6.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// ONNX spec example: exclusive=1 -> [1, 1, 2] +TEST(CumProdTest, _OnnxSpecExampleExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddInput("x", {3}, {1.f, 2.f, 3.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {3}, {1.f, 1.f, 2.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// ONNX spec example: reverse=1 -> [6, 6, 3] +TEST(CumProdTest, _OnnxSpecExampleReverse) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("reverse", 1); + test.AddInput("x", {3}, {1.f, 2.f, 3.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {3}, {6.f, 6.f, 3.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// ONNX spec example: exclusive=1, reverse=1 -> [6, 3, 1] +TEST(CumProdTest, _OnnxSpecExampleReverseExclusive) { + OpTester test("CumProd", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("exclusive", 1); + test.AddAttribute("reverse", 1); + test.AddInput("x", {3}, {1.f, 2.f, 3.f}); + test.AddInput("axis", {}, {0}); + test.AddOutput("y", {3}, {6.f, 3.f, 1.f}); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc b/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc new file mode 100644 index 0000000000000..d2c674b1081b2 --- /dev/null +++ b/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc @@ -0,0 +1,183 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" +#include "core/framework/to_tensor_proto_element_type.h" + +#include +#include + +namespace onnxruntime { +namespace test { + +template +void TestBitCastOp(const std::vector& shape, + const std::vector& input, + const std::vector& expected_output) { + OpTester test("BitCast", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("to", utils::ToTensorProtoElementType()); + test.AddInput("input", shape, input); + test.AddOutput("output", shape, expected_output); + // BitCast is CPU-only for now; exclude providers that don't support it. + test.Run(OpTester::ExpectResult::kExpectSuccess, "", + {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); +} + +// float32 and int32 are both 4 bytes. +// IEEE 754: 1.0f = 0x3F800000 = 1065353216 as int32 +TEST(BitCastTest, Float32ToInt32) { + std::vector input = {0.0f, 1.0f, -1.0f, 0.5f}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(float)); + + TestBitCastOp({4}, input, expected); +} + +TEST(BitCastTest, Int32ToFloat32) { + // 0x3F800000 = 1065353216 -> 1.0f + // 0x40000000 = 1073741824 -> 2.0f + std::vector input = {0, 1065353216, 1073741824}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(int32_t)); + + TestBitCastOp({3}, input, expected); +} + +// double and int64 are both 8 bytes. +TEST(BitCastTest, DoubleToInt64) { + std::vector input = {0.0, 1.0, -1.0}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(double)); + + TestBitCastOp({3}, input, expected); +} + +TEST(BitCastTest, Int64ToDouble) { + std::vector input = {0, 4607182418800017408}; // 0 and 1.0 as int64 + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(int64_t)); + + TestBitCastOp({2}, input, expected); +} + +// float16 and uint16 are both 2 bytes. +TEST(BitCastTest, Float16ToUInt16) { + std::vector input = {MLFloat16(0.0f), MLFloat16(1.0f), MLFloat16(0.5f)}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(MLFloat16)); + + TestBitCastOp({3}, input, expected); +} + +TEST(BitCastTest, UInt16ToFloat16) { + std::vector input = {0x0000, 0x3C00, 0x3800}; // 0.0, 1.0, 0.5 in float16 + std::vector expected; + expected.reserve(input.size()); + for (auto v : input) { + expected.push_back(MLFloat16::FromBits(v)); + } + + TestBitCastOp({3}, input, expected); +} + +// BFloat16 and int16 are both 2 bytes. +TEST(BitCastTest, BFloat16ToInt16) { + std::vector input = {BFloat16(0.0f), BFloat16(1.0f)}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(BFloat16)); + + TestBitCastOp({2}, input, expected); +} + +// int8 and uint8 are both 1 byte. +TEST(BitCastTest, Int8ToUInt8) { + std::vector input = {0, 1, -1, 127, -128}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(int8_t)); + + TestBitCastOp({5}, input, expected); +} + +TEST(BitCastTest, UInt8ToInt8) { + std::vector input = {0, 1, 127, 128, 255}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(uint8_t)); + + TestBitCastOp({5}, input, expected); +} + +// Same type (identity-like). +TEST(BitCastTest, Float32ToFloat32) { + std::vector input = {1.0f, 2.0f, 3.0f}; + TestBitCastOp({3}, input, input); +} + +// Multi-dimensional input. +TEST(BitCastTest, Float32ToInt32_2D) { + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(float)); + + TestBitCastOp({2, 3}, input, expected); +} + +TEST(BitCastTest, Float32ToInt32_3D) { + std::vector input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(float)); + + TestBitCastOp({2, 2, 3}, input, expected); +} + +// Empty tensor. +TEST(BitCastTest, EmptyTensor) { + std::vector input = {}; + std::vector expected = {}; + TestBitCastOp({0}, input, expected); +} + +// Scalar (0-dim) tensor. +TEST(BitCastTest, ScalarTensor) { + std::vector input = {42.0f}; + std::vector expected(1); + std::memcpy(expected.data(), input.data(), sizeof(float)); + + OpTester test("BitCast", 26, onnxruntime::kOnnxDomain); + test.AddAttribute("to", utils::ToTensorProtoElementType()); + test.AddInput("input", {}, input); + test.AddOutput("output", {}, expected); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", + {kTensorrtExecutionProvider, kOpenVINOExecutionProvider}); +} + +// uint32 and float32 (same size, 4 bytes). +TEST(BitCastTest, UInt32ToFloat32) { + std::vector input = {0, 0x3F800000, 0x40000000}; // 0.0f, 1.0f, 2.0f + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(uint32_t)); + + TestBitCastOp({3}, input, expected); +} + +// uint64 and double (same size, 8 bytes). +TEST(BitCastTest, UInt64ToDouble) { + std::vector input = {0, 0x3FF0000000000000ULL}; // 0.0 and 1.0 as uint64 + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(uint64_t)); + + TestBitCastOp({2}, input, expected); +} + +// int16 and uint16 (same size, 2 bytes). +TEST(BitCastTest, Int16ToUInt16) { + std::vector input = {0, 1, -1, 32767, -32768}; + std::vector expected(input.size()); + std::memcpy(expected.data(), input.data(), input.size() * sizeof(int16_t)); + + TestBitCastOp({5}, input, expected); +} + +} // namespace test +} // namespace onnxruntime From f2bc8bf95efd6a3fb3e48e7f759b097b64023f5f Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 9 Mar 2026 21:34:49 +0000 Subject: [PATCH 03/18] Regenerate OperatorKernels.md for ONNX opset 26 Add BitCast and CumProd entries to the CPU provider kernel documentation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/OperatorKernels.md | 446 +--------------------------------------- 1 file changed, 4 insertions(+), 442 deletions(-) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index abdcc81586909..10fc961865314 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -6,7 +6,6 @@ Do not modify directly.* - [CPUExecutionProvider](#cpuexecutionprovider) - [CUDAExecutionProvider](#cudaexecutionprovider) -- [DmlExecutionProvider](#dmlexecutionprovider) --------------- @@ -54,6 +53,7 @@ Do not modify directly.* |||14|**T** = tensor(double), tensor(float)
**U** = tensor(double), tensor(float)| |||[9, 13]|**T** = tensor(double), tensor(float)| |||[7, 8]|**T** = tensor(double), tensor(float)| +|BitCast|*in* input:**T1**
*out* output:**T2**|26+|**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |BitShift|*in* X:**T**
*in* Y:**T**
*out* Z:**T**|11+|**T** = tensor(uint32), tensor(uint64), tensor(uint8)| |BitwiseAnd|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |BitwiseNot|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -99,6 +99,7 @@ Do not modify directly.* |Cosh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| |||[9, 21]|**T** = tensor(float)| |Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| +|CumProd|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|26+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T2** = tensor(int32), tensor(int64)| |CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**

or

*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| @@ -655,7 +656,8 @@ Do not modify directly.* |ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(float16)| |||12|**T** = tensor(double), tensor(float), tensor(float16)| |||[1, 11]|**T** = tensor(double), tensor(float), tensor(float16)| -|Attention|*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*in* nonpad_kv_seqlen:**tensor(int64)**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**

or

*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**|23+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| +|Attention|*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*in* nonpad_kv_seqlen:**tensor(int64)**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**

or

*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**|24+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| +|||23|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| |AveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)| |||[19, 21]|**T** = tensor(double), tensor(float), tensor(float16)| |||[11, 18]|**T** = tensor(double), tensor(float), tensor(float16)| @@ -1077,443 +1079,3 @@ Do not modify directly.* |||[1, 12]|**T** = tensor(double), tensor(float), tensor(float16)| | | | | - - - - -## Operators implemented by DmlExecutionProvider - -| Op Name | Parameters | OpSet Version | Types Supported | -|---------|------------|---------------|-----------------| -|**Operator Domain:** *ai.onnx*|||| -|Abs|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| -|||6+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| -|Acos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Acosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| -|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Affine|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|And|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| -|ArgMax|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Asin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Asinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| -|Atan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Atanh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| -|AveragePool|*in* X:**T**
*out* Y:**T**|19+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||10+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|BatchNormalization|*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* input_mean:**U**
*in* input_var:**U**
*out* Y:**T**
*out* running_mean:**U**
*out* running_var:**U**

or

*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* mean:**T**
*in* var:**T**
*out* Y:**T**
*out* mean:**T**
*out* var:**T**
*out* saved_mean:**T**
*out* saved_var:**T**

or

*in* X:**T**
*in* scale:**T1**
*in* B:**T1**
*in* input_mean:**T2**
*in* input_var:**T2**
*out* Y:**T**
*out* running_mean:**T2**
*out* running_var:**T2**|15+|**T** = tensor(float), tensor(float16)| -|||14+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|BitShift|*in* X:**T**
*in* Y:**T**
*out* Z:**T**|11+|**T** = tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|BitwiseAnd|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|BitwiseNot|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|BitwiseOr|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|BitwiseXor|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Cast|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||19+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||6+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|CastLike|*in* input:**T1**
*in* target_type:**T2**
*out* output:**T2**|21+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||19+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||15+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Ceil|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Celu|*in* X:**T**
*out* Y:**T**|12+|**T** = tensor(float), tensor(float16)| -|Clip|*in* input:**T**
*in* min:**T**
*in* max:**T**
*out* output:**T**

or

*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Concat|*in* inputs:**T**
*out* concat_result:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||4+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|ConcatFromSequence|*in* input_sequence:**S**
*out* concat_result:**T**|11+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|ConstantOfShape|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(int64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**T1** = tensor(int64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ConvInteger|*in* x:**T1**
*in* w:**T2**
*in* x_zero_point:**T1**
*in* w_zero_point:**T2**
*out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int32)| -|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|Cos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Cosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| -|Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**

or

*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| -|||17+|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| -|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T3**|21+|**T1** = tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||19+|**T1** = tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||13+|**T** = tensor(int32), tensor(int8), tensor(uint8)| -|||10+|**T** = tensor(int32), tensor(int8), tensor(uint8)| -|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|7+|**T** = tensor(float), tensor(float16)| -|DynamicQuantizeLinear|*in* x:**T1**
*out* y:**T2**
*out* y_scale:**tensor(float)**
*out* y_zero_point:**T2**|11+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| -|Einsum|*in* Inputs:**T**
*out* Output:**T**|12+|**T** = tensor(float), tensor(float16)| -|Elu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| -|Equal|*in* A:**T**
*in* B:**T**
*out* C:**T1**|19+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| -|Erf|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|Exp|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Expand|*in* input:**T**
*in* shape:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||8+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|EyeLike|*in* input:**T1**
*out* output:**T2**|9+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Flatten|*in* input:**T**
*out* output:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Floor|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|Gather|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|GatherElements|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|GatherND|*in* data:**T**
*in* indices:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Gemm|*in* A:**T**
*in* B:**T**
*in* C:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|GlobalLpPool|*in* X:**T**
*out* Y:**T**|2+|**T** = tensor(float), tensor(float16)| -|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|Greater|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| -|GreaterOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|16+|**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|GroupNorm||21+|**M** = tensor(float), tensor(float16)
**T** = tensor(float), tensor(float16)| -|HardSigmoid|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| -|Hardmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|Identity|*in* input:**T**
*out* output:**T**

or

*in* input:**V**
*out* output:**V**|21+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||19+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||16+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||14+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|If|*in* cond:**B**
*out* outputs:**V**|19+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(float8e4m3fn)), seq(tensor(float8e4m3fnuz)), seq(tensor(float8e5m2)), seq(tensor(float8e5m2fnuz)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||16+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||7+|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|ImageScaler|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|6+|**T** = tensor(float), tensor(float16)| -|IsInf|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(float)
**T2** = tensor(bool)| -|||10+|**T1** = tensor(float)
**T2** = tensor(bool)| -|IsNaN|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| -|||13+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| -|||9+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| -|LRN|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|14+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|LayerNormalization|*in* X:**T**
*in* Scale:**T**
*in* B:**T**
*out* Y:**T**
*out* Mean:**U**
*out* InvStdDev:**U**

or

*in* X:**T**
*in* Scale:**V**
*in* B:**V**
*out* Y:**V**
*out* Mean:**U**
*out* InvStdDev:**U**|17+|**T** = tensor(float), tensor(float16)
**U** = tensor(float)| -|||1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| -|LeakyRelu|*in* X:**T**
*out* Y:**T**|16+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Less|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| -|LessOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| -|Log|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|LogSoftmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|LpPool|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||2+|**T** = tensor(float), tensor(float16)| -|MatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|MatMulInteger|*in* A:**T1**
*in* B:**T2**
*in* a_zero_point:**T1**
*in* b_zero_point:**T2**
*out* Y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int32)| -|Max|*in* data_0:**T**
*out* max:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||8+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|12+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| -|||11+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| -|||10+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| -|||8+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| -|||1+|**T** = tensor(float), tensor(float16)| -|MaxRoiPool|*in* X:**T**
*in* rois:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|11+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int64)| -|||9+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int64)| -|Mean|*in* data_0:**T**
*out* mean:**T**|13+|**T** = tensor(float), tensor(float16)| -|||8+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|MeanVarianceNormalization|*in* X:**T**
*out* Y:**T**

or

*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|MemcpyFromHost|*in* X:**T**
*out* Y:**T**|1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)| -|MemcpyToHost|*in* X:**T**
*out* Y:**T**|1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)| -|Min|*in* data_0:**T**
*out* min:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||8+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Mod|*in* A:**T**
*in* B:**T**
*out* C:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|||10+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Neg|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| -|||6+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| -|NonZero|*in* X:**T**
*out* Y:**tensor(int64)**|13+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|||9+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|Not|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(bool)| -|OneHot|*in* indices:**T1**
*in* depth:**T2**
*in* values:**T3**
*out* output:**T3**|11+|**T1** = tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T3** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**T1** = tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T3** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|OptionalGetElement|*in* input:**O**
*out* output:**V**|18+|**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8)), seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||15+|**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8))
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|OptionalHasElement|*in* input:**O**
*out* output:**B**|18+|**B** = tensor(bool)
**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8)), seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||15+|**B** = tensor(bool)
**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8))| -|Or|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| -|PRelu|*in* X:**T**
*in* slope:**T**
*out* Y:**T**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| -|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| -|||7+|**T** = tensor(float), tensor(float16)| -|Pad|*in* data:**T**
*in* pads:**tensor(int64)**
*in* constant_value:**T**
*in* axes:**Tind**
*out* output:**T**

or

*in* data:**T**
*in* pads:**tensor(int64)**
*in* constant_value:**T**
*out* output:**T**

or

*in* data:**T**
*out* output:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||2+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|ParametricSoftplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|Pow|*in* X:**T**
*in* Y:**T**
*out* Z:**T**

or

*in* X:**T**
*in* Y:**T1**
*out* Z:**T**|15+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|||7+|**T** = tensor(float), tensor(float16)| -|QLinearConv|*in* x:**T1**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T1**
*in* w:**T2**
*in* w_scale:**tensor(float)**
*in* w_zero_point:**T2**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T3**
*in* B:**T4**
*out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)
**T4** = tensor(int32)| -|QLinearMatMul|*in* a:**T1**
*in* a_scale:**TS**
*in* a_zero_point:**T1**
*in* b:**T2**
*in* b_scale:**TS**
*in* b_zero_point:**T2**
*in* y_scale:**TS**
*in* y_zero_point:**T3**
*out* y:**T3**

or

*in* a:**T1**
*in* a_scale:**tensor(float)**
*in* a_zero_point:**T1**
*in* b:**T2**
*in* b_scale:**tensor(float)**
*in* b_zero_point:**T2**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T3**
*out* y:**T3**|21+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)| -|||10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)| -|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**

or

*in* x:**T1**
*in* y_scale:**T2**
*in* y_zero_point:**T3**
*out* y:**T3**

or

*in* x:**T1**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T2**
*out* y:**T2**|21+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)| -|||19+|**T1** = tensor(float), tensor(float16), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| -|||13+|**T1** = tensor(float), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| -|||10+|**T1** = tensor(float), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| -|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|Range|*in* start:**T**
*in* limit:**T**
*in* delta:**T**
*out* output:**T**|11+|**T** = tensor(float), tensor(int16), tensor(int32), tensor(int64)| -|Reciprocal|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|ReduceL1|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|ReduceL2|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceLogSum|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceLogSumExp|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceMax|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceMean|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceMin|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|ReduceProd|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|ReduceSum|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|ReduceSumSquare|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| -|Relu|*in* X:**T**
*out* Y:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| -|||13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Reshape|*in* data:**T**
*in* shape:**tensor(int64)**
*out* reshaped:**T**

or

*in* data:**T**
*out* reshaped:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||14+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||5+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Resize|*in* X:**T**
*in* scales:**tensor(float)**
*out* Y:**T**

or

*in* X:**T1**
*in* roi:**T2**
*in* scales:**tensor(float)**
*in* sizes:**tensor(int64)**
*out* Y:**T1**|19+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||18+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||13+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||11+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|||10+|**T** = tensor(float), tensor(float16)| -|ReverseSequence|*in* input:**T**
*in* sequence_lens:**tensor(int64)**
*out* Y:**T**|10+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|RoiAlign|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*out* Y:**T1**|16+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| -|||10+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| -|Round|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| -|STFT|*in* signal:**T1**
*in* frame_step:**T2**
*in* window:**T1**
*in* frame_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| -|ScaledTanh|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|Scatter|*in* data:**T**
*in* indices:**Tind**
*in* updates:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||9+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|ScatterElements|*in* data:**T**
*in* indices:**Tind**
*in* updates:**T**
*out* output:**T**|16+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|ScatterND|*in* data:**T**
*in* indices:**tensor(int64)**
*in* updates:**T**
*out* output:**T**|16+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Selu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| -|SequenceAt|*in* input_sequence:**S**
*in* position:**I**
*out* tensor:**T**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|SequenceConstruct|*in* inputs:**T**
*out* output_sequence:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|SequenceEmpty|*out* output:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| -|SequenceErase|*in* input_sequence:**S**
*in* position:**I**
*out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| -|SequenceInsert|*in* input_sequence:**S**
*in* tensor:**T**
*in* position:**I**
*out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| -|SequenceLength|*in* input_sequence:**S**
*out* length:**I**|11+|**I** = tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| -|Shape|*in* data:**T**
*out* shape:**T1**|21+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||19+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||15+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||13+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|Shrink|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| -|Sigmoid|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Sign|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|SimplifiedLayerNormalization|*in* X:**T**
*in* scale:**V**
*out* Y:**V**
*out* inv_std_var:**U**|1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| -|Sin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Sinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| -|Size|*in* data:**T**
*out* size:**T1**|21+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||19+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||13+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|||1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| -|Slice|*in* data:**T**
*in* starts:**Tind**
*in* ends:**Tind**
*in* axes:**Tind**
*in* steps:**Tind**
*out* output:**T**

or

*in* data:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||10+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Softmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||11+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|Softplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|Softsign|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|SpaceToDepth|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Split|*in* input:**T**
*in* split:**T**
*out* outputs...:**T**

or

*in* input:**T**
*in* split:**tensor(int64)**
*out* outputs:**T**

or

*in* input:**T**
*out* outputs:**T**|18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||2+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Sqrt|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Squeeze|*in* data:**T**
*in* axes:**tensor(int64)**
*out* squeezed:**T**

or

*in* data:**T**
*out* squeezed:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Sum|*in* data_0:**T**
*out* sum:**T**|13+|**T** = tensor(float), tensor(float16)| -|||8+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|Tan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| -|Tanh|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| -|||6+|**T** = tensor(float), tensor(float16)| -|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float), tensor(float16)| -|||1+|**T** = tensor(float), tensor(float16)| -|Tile|*in* input:**T**
*in* repeats:**T1**
*out* output:**T**

or

*in* input:**T**
*in* tiles:**T**
*in* axis:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||6+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|TopK|*in* X:**T**
*in* K:**tensor(int64)**
*out* Values:**T**
*out* Indices:**I**

or

*in* X:**T**
*out* Values:**T**
*out* Indices:**I**|11+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||10+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Transpose|*in* data:**T**
*out* transposed:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Trilu|*in* input:**T**
*in* k:**tensor(int64)**
*out* output:**T**|14+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Unsqueeze|*in* data:**T**
*in* axes:**tensor(int64)**
*out* expanded:**T**

or

*in* data:**T**
*out* expanded:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Upsample|*in* X:**T**
*in* scales:**tensor(float)**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float), tensor(float16)| -|||9+|**T** = tensor(float), tensor(float16)| -|||7+|**T** = tensor(float), tensor(float16)| -|Where|*in* condition:**B**
*in* X:**T**
*in* Y:**T**
*out* output:**T**|16+|**B** = tensor(bool)
**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|||9+|**B** = tensor(bool)
**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Xor|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| -| | -| | -|**Operator Domain:** *com.microsoft*|||| -|Attention|*in* input:**T**
*in* weights:**T**
*in* bias:**T**
*in* mask_index:**M**
*in* past:**T**
*in* attention_bias:**T**
*in* past_sequence_length:**M**
*out* output:**T**
*out* present:**T**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| -|BiasAdd|*in* X:**T**
*in* bias:**T**
*in* skip:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|BiasGelu|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float), tensor(float16)| -|BiasSplitGelu|*in* X:**T**
*in* bias:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|ConvTransposeWithDynamicPads|*in* X:**T**
*in* W:**T**
*in* Pads:**tensor(int64)**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| -|DynamicQuantizeMatMul|*in* A:**T1**
*in* B:**T2**
*in* b_scale:**T1**
*in* b_zero_point:**T2**
*in* bias:**T1**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| -|EmbedLayerNormalization|*in* input_ids:**T1**
*in* segment_ids:**T1**
*in* word_embedding:**T**
*in* position_embedding:**T**
*in* segment_embedding:**T**
*in* gamma:**T**
*in* beta:**T**
*in* mask:**T1**
*in* position_ids:**T1**
*out* output:**T**
*out* mask_index:**T1**
*out* embedding_sum:**T**|1+|**T** = tensor(float), tensor(float16)| -|FastGelu|*in* X:**T**
*in* bias:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|FusedMatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|FusedMatMulActivation|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|Gelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|GroupNorm|*in* X:**T**
*in* gamma:**M**
*in* beta:**M**
*out* Y:**T**|1+|**M** = tensor(float), tensor(float16)
**T** = tensor(float), tensor(float16)| -|GroupQueryAttention|*in* query:**T**
*in* key:**T**
*in* value:**T**
*in* past_key:**T_CACHE**
*in* past_value:**T_CACHE**
*in* seqlens_k:**M**
*in* total_sequence_length:**M**
*in* cos_cache:**T**
*in* sin_cache:**T**
*in* position_ids:**tensor(int64)**
*in* attention_bias:**T**
*in* head_sink:**T**
*in* k_scale:**T_KV_SCALE**
*in* v_scale:**T_KV_SCALE**
*out* output:**T**
*out* present_key:**T_CACHE**
*out* present_value:**T_CACHE**
*out* output_qk:**T**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| -|MatMulIntegerToFloat|*in* A:**T1**
*in* B:**T2**
*in* a_scale:**T3**
*in* b_scale:**T3**
*in* a_zero_point:**T1**
*in* b_zero_point:**T2**
*in* bias:**T3**
*out* Y:**T3**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(float), tensor(float16)| -|MatMulNBits|*in* A:**T1**
*in* B:**T2**
*in* scales:**T1**
*in* zero_points:**T3**
*in* g_idx:**T4**
*in* bias:**T1**
*out* Y:**T1**|1+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(uint8)| -|MultiHeadAttention|*in* query:**T**
*in* key:**T**
*in* value:**T**
*in* bias:**T**
*in* key_padding_mask:**M**
*in* attention_bias:**T**
*in* past_key:**T**
*in* past_value:**T**
*in* past_sequence_length:**M**
*in* cache_indirection:**M**
*out* output:**T**
*out* present_key:**T**
*out* present_value:**T**
*out* qk:**QK**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| -|NhwcConv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|QAttention|*in* input:**T1**
*in* weight:**T2**
*in* bias:**T3**
*in* input_scale:**T3**
*in* weight_scale:**T3**
*in* mask_index:**T4**
*in* input_zero_point:**T1**
*in* weight_zero_point:**T2**
*in* past:**T3**
*out* output:**T3**
*out* present:**T3**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(float), tensor(float16)
**T4** = tensor(int32)| -|QLinearAdd|*in* A:**T**
*in* A_scale:**tensor(float)**
*in* A_zero_point:**T**
*in* B:**T**
*in* B_scale:**tensor(float)**
*in* B_zero_point:**T**
*in* C_scale:**tensor(float)**
*in* C_zero_point:**T**
*out* C:**T**|1+|**T** = tensor(int8), tensor(uint8)| -|QLinearAveragePool|*in* X:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| -|QLinearConcat|*in* Y_scale:**TF**
*in* Y_zero_point:**T8**
*in* inputs:**TV**
*out* Y:**T8**|1+|**T8** = tensor(int8), tensor(uint8)
**TF** = tensor(float)
**TV** = tensor(float), tensor(int8), tensor(uint8)| -|QLinearGlobalAveragePool|*in* X:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| -|QLinearSigmoid|*in* X:**T**
*in* X_scale:**tensor(float)**
*in* X_zero_point:**T**
*in* Y_scale:**tensor(float)**
*in* Y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| -|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**|1+|**T1** = tensor(float), tensor(float16), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| -|QuickGelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|RotaryEmbedding|*in* input:**T**
*in* position_ids:**M**
*in* cos_cache:**T**
*in* sin_cache:**T**
*out* output:**T**|1+|**M** = tensor(int64)
**T** = tensor(float), tensor(float16)| -|SkipLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* beta:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| -|SkipSimplifiedLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| -| | -| | -|**Operator Domain:** *com.microsoft.dml*|||| -|DmlFusedAdd|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedBatchNormalization|*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* mean:**T**
*in* var:**T**
*out* Y:**T**
*out* mean:**T**
*out* var:**T**
*out* saved_mean:**T**
*out* saved_var:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedConv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedGemm|*in* A:**T**
*in* B:**T**
*in* C:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedInstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedMatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedMeanVarianceNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| -|DmlFusedSum|*in* data_0:**T**
*out* sum:**T**|1+|**T** = tensor(float), tensor(float16)| -| | -| | From 0189eff60123fcd0229ff6754ffa5a6ced482a25 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 9 Mar 2026 22:26:45 +0000 Subject: [PATCH 04/18] Fix onnx version to 1.21.0rc1 and apply lintrunner formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change onnx==1.21.0 to onnx==1.21.0rc1 in all 7 requirements.txt files since the final 1.21.0 release is not yet published. Apply lintrunner auto-formatting fixes to whitespace/alignment. Verified SHA1 (deps.txt) and SHA512 (vcpkg portfile) hashes match the downloaded archives. No v1.21.0 tag exists yet — commit hash URL is correct. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../core/graph/contrib_ops/contrib_defs.h | 4 +- onnxruntime/core/graph/dml_ops/dml_defs.h | 4 +- .../core/providers/cpu/math/cumprod.cc | 12 ++--- .../core/providers/cpu/tensor/bitcast_op.cc | 44 +++++++++---------- .../test/opaque_api/test_opaque_api.cc | 2 +- .../providers/cpu/tensor/bitcast_op_test.cc | 2 +- onnxruntime/test/python/requirements.txt | 2 +- .../python/cpu/scripts/requirements.txt | 2 +- .../docker/scripts/lort/requirements.txt | 2 +- .../docker/scripts/manylinux/requirements.txt | 2 +- .../linux/docker/scripts/requirements.txt | 2 +- .../github/linux/python/requirements.txt | 2 +- .../github/windows/python/requirements.txt | 2 +- 13 files changed, 41 insertions(+), 41 deletions(-) diff --git a/onnxruntime/core/graph/contrib_ops/contrib_defs.h b/onnxruntime/core/graph/contrib_ops/contrib_defs.h index f88257b2baf08..ceb18386de9e4 100644 --- a/onnxruntime/core/graph/contrib_ops/contrib_defs.h +++ b/onnxruntime/core/graph/contrib_ops/contrib_defs.h @@ -35,7 +35,7 @@ inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) { ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) \ +#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ(Counter, name) \ static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) @@ -43,7 +43,7 @@ inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) { ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func) #define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \ ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) -#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ +#define ONNX_CONTRIB_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ [[maybe_unused]] = schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) diff --git a/onnxruntime/core/graph/dml_ops/dml_defs.h b/onnxruntime/core/graph/dml_ops/dml_defs.h index 9551f72adfe17..ca97b655be3b3 100644 --- a/onnxruntime/core/graph/dml_ops/dml_defs.h +++ b/onnxruntime/core/graph/dml_ops/dml_defs.h @@ -11,7 +11,7 @@ namespace dml { MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) \ +#define MS_DML_OPERATOR_SCHEMA_UNIQ(Counter, name) \ static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) @@ -19,7 +19,7 @@ namespace dml { MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(__COUNTER__, name, schema_func) #define MS_DML_OPERATOR_SCHEMA_UNIQ_HELPER_ELSEWHERE(Counter, name, schema_func) \ MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) -#define MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ +#define MS_DML_OPERATOR_SCHEMA_UNIQ_ELSEWHERE(Counter, name, schema_func) \ static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ [[maybe_unused]] = schema_func(ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__)) diff --git a/onnxruntime/core/providers/cpu/math/cumprod.cc b/onnxruntime/core/providers/cpu/math/cumprod.cc index 6706c2d3ea8d0..a37bbe32a9c7a 100644 --- a/onnxruntime/core/providers/cpu/math/cumprod.cc +++ b/onnxruntime/core/providers/cpu/math/cumprod.cc @@ -44,7 +44,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); ONNX_CPU_OPERATOR_TYPED_KERNEL( @@ -54,7 +54,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); ONNX_CPU_OPERATOR_TYPED_KERNEL( @@ -64,7 +64,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); ONNX_CPU_OPERATOR_TYPED_KERNEL( @@ -74,7 +74,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); ONNX_CPU_OPERATOR_TYPED_KERNEL( @@ -84,7 +84,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); ONNX_CPU_OPERATOR_TYPED_KERNEL( @@ -94,7 +94,7 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", std::vector{DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}), + DataTypeImpl::GetTensorType()}), CumProd); template diff --git a/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc index 9d9fa3e0c462b..e932438f6a914 100644 --- a/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc @@ -14,29 +14,29 @@ ONNX_CPU_OPERATOR_KERNEL( 26, KernelDefBuilder() .TypeConstraint("T1", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}) + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) .TypeConstraint("T2", {DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType(), - DataTypeImpl::GetTensorType()}) + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) .MayInplace(0, 0), BitCast); diff --git a/onnxruntime/test/opaque_api/test_opaque_api.cc b/onnxruntime/test/opaque_api/test_opaque_api.cc index e39e76a912aec..e4479ce473939 100644 --- a/onnxruntime/test/opaque_api/test_opaque_api.cc +++ b/onnxruntime/test/opaque_api/test_opaque_api.cc @@ -118,7 +118,7 @@ ONNX_OPERATOR_KERNEL_EX( ONNX_TEST_OPERATOR_SCHEMA_UNIQ_HELPER(__COUNTER__, name) #define ONNX_TEST_OPERATOR_SCHEMA_UNIQ_HELPER(Counter, name) \ ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) -#define ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) \ +#define ONNX_TEST_OPERATOR_SCHEMA_UNIQ(Counter, name) \ static ONNX_NAMESPACE::OpSchemaRegistry::OpSchemaRegisterOnce op_schema_register_once##name##Counter \ [[maybe_unused]] = ONNX_NAMESPACE::OpSchema(#name, __FILE__, __LINE__) diff --git a/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc b/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc index d2c674b1081b2..65a2de1ee7f3f 100644 --- a/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/bitcast_op_test.cc @@ -124,7 +124,7 @@ TEST(BitCastTest, Float32ToInt32_2D) { TEST(BitCastTest, Float32ToInt32_3D) { std::vector input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, - 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; + 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; std::vector expected(input.size()); std::memcpy(expected.data(), input.data(), input.size() * sizeof(float)); diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index 3ece2f39d4042..c3f671ebf7de9 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.21.0 +onnx==1.21.0rc1 pytest onnx-ir diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index b4c2f163e22ac..d4511e31964ba 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.21.0 +onnx==1.21.0rc1 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index eb52681341012..c33fd1d102a5c 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.21.0 +onnx==1.21.0rc1 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 9a0a6d0f51900..cdd375d49f0b6 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.21.0 +onnx==1.21.0rc1 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 3d886832e1ccb..8628e81251eac 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.21.0 +onnx==1.21.0rc1 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index bfe9ab0d8a508..8b7af9d97a6b1 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.21.0 +onnx==1.21.0rc1 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index 2dfba37c6f381..110d3785d1369 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.21.0 +onnx==1.21.0rc1 From 86dbc8e60dea7a37c884f89e71f746929e7c7223 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 9 Mar 2026 22:29:48 +0000 Subject: [PATCH 05/18] Address PR review feedback: code fixes and restore DML docs - cumprod.cc: Add #include , validate axis tensor has exactly one element (0-D scalar or 1-D shape [1]) - bitcast_op.cc: Add null check for TensorTypeFromONNXEnum return value - OperatorKernels.md: Restore DML section that was accidentally removed during regeneration, add BitCast and CumProd entries manually Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/OperatorKernels.md | 444 +++++++++++++++++- .../core/providers/cpu/math/cumprod.cc | 6 +- .../core/providers/cpu/tensor/bitcast_op.cc | 2 + 3 files changed, 448 insertions(+), 4 deletions(-) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 10fc961865314..e172045020572 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -6,6 +6,7 @@ Do not modify directly.* - [CPUExecutionProvider](#cpuexecutionprovider) - [CUDAExecutionProvider](#cudaexecutionprovider) +- [DmlExecutionProvider](#dmlexecutionprovider) --------------- @@ -656,8 +657,7 @@ Do not modify directly.* |ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(float16)| |||12|**T** = tensor(double), tensor(float), tensor(float16)| |||[1, 11]|**T** = tensor(double), tensor(float), tensor(float16)| -|Attention|*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*in* nonpad_kv_seqlen:**tensor(int64)**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**

or

*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**|24+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| -|||23|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| +|Attention|*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*in* nonpad_kv_seqlen:**tensor(int64)**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**

or

*in* Q:**T1**
*in* K:**T1**
*in* V:**T2**
*in* attn_mask:**U**
*in* past_key:**T1**
*in* past_value:**T2**
*out* Y:**T1**
*out* present_key:**T1**
*out* present_value:**T2**
*out* qk_matmul_output:**T1**|23+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)
**T2** = tensor(bfloat16), tensor(float), tensor(float16)
**U** = tensor(bfloat16), tensor(bool), tensor(float), tensor(float16)| |AveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16)| |||[19, 21]|**T** = tensor(double), tensor(float), tensor(float16)| |||[11, 18]|**T** = tensor(double), tensor(float), tensor(float16)| @@ -1079,3 +1079,443 @@ Do not modify directly.* |||[1, 12]|**T** = tensor(double), tensor(float), tensor(float16)| | | | | + + +
+ +## Operators implemented by DmlExecutionProvider + +| Op Name | Parameters | OpSet Version | Types Supported | +|---------|------------|---------------|-----------------| +|**Operator Domain:** *ai.onnx*|||| +|Abs|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| +|||6+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| +|Acos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Acosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| +|Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Affine|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|And|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| +|ArgMax|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Asin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Asinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| +|Atan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Atanh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| +|AveragePool|*in* X:**T**
*out* Y:**T**|19+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||10+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|BatchNormalization|*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* input_mean:**U**
*in* input_var:**U**
*out* Y:**T**
*out* running_mean:**U**
*out* running_var:**U**

or

*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* mean:**T**
*in* var:**T**
*out* Y:**T**
*out* mean:**T**
*out* var:**T**
*out* saved_mean:**T**
*out* saved_var:**T**

or

*in* X:**T**
*in* scale:**T1**
*in* B:**T1**
*in* input_mean:**T2**
*in* input_var:**T2**
*out* Y:**T**
*out* running_mean:**T2**
*out* running_var:**T2**|15+|**T** = tensor(float), tensor(float16)| +|||14+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|BitShift|*in* X:**T**
*in* Y:**T**
*out* Z:**T**|11+|**T** = tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|BitwiseAnd|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|BitwiseNot|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|BitwiseOr|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|BitwiseXor|*in* A:**T**
*in* B:**T**
*out* C:**T**|18+|**T** = tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Cast|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||19+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||6+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|CastLike|*in* input:**T1**
*in* target_type:**T2**
*out* output:**T2**|21+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||19+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||15+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Ceil|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Celu|*in* X:**T**
*out* Y:**T**|12+|**T** = tensor(float), tensor(float16)| +|Clip|*in* input:**T**
*in* min:**T**
*in* max:**T**
*out* output:**T**

or

*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Col2Im|*in* input:**T**
*in* image_shape:**tensor(int64)**
*in* block_shape:**tensor(int64)**
*out* output:**T**|18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Concat|*in* inputs:**T**
*out* concat_result:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||4+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|ConcatFromSequence|*in* input_sequence:**S**
*out* concat_result:**T**|11+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|ConstantOfShape|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(int64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**T1** = tensor(int64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ConvInteger|*in* x:**T1**
*in* w:**T2**
*in* x_zero_point:**T1**
*in* w_zero_point:**T2**
*out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int32)| +|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|Cos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Cosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| +|Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|DFT|*in* input:**T1**
*in* dft_length:**T2**
*in* axis:**tensor(int64)**
*out* output:**T1**

or

*in* input:**T1**
*in* dft_length:**T2**
*out* output:**T1**|20+|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| +|||17+|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| +|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**

or

*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T3**|21+|**T1** = tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||19+|**T1** = tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||13+|**T** = tensor(int32), tensor(int8), tensor(uint8)| +|||10+|**T** = tensor(int32), tensor(int8), tensor(uint8)| +|Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|7+|**T** = tensor(float), tensor(float16)| +|DynamicQuantizeLinear|*in* x:**T1**
*out* y:**T2**
*out* y_scale:**tensor(float)**
*out* y_zero_point:**T2**|11+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| +|Einsum|*in* Inputs:**T**
*out* Output:**T**|12+|**T** = tensor(float), tensor(float16)| +|Elu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| +|Equal|*in* A:**T**
*in* B:**T**
*out* C:**T1**|19+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| +|Erf|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|Exp|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Expand|*in* input:**T**
*in* shape:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||8+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|EyeLike|*in* input:**T1**
*out* output:**T2**|9+|**T1** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Flatten|*in* input:**T**
*out* output:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Floor|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|Gather|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|GatherElements|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|GatherND|*in* data:**T**
*in* indices:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Gemm|*in* A:**T**
*in* B:**T**
*in* C:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|GlobalLpPool|*in* X:**T**
*out* Y:**T**|2+|**T** = tensor(float), tensor(float16)| +|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|Greater|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| +|GreaterOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|16+|**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|GroupNorm||21+|**M** = tensor(float), tensor(float16)
**T** = tensor(float), tensor(float16)| +|HardSigmoid|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| +|Hardmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|Identity|*in* input:**T**
*out* output:**T**

or

*in* input:**V**
*out* output:**V**|21+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||19+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||16+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||14+|**V** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|If|*in* cond:**B**
*out* outputs:**V**|19+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(float8e4m3fn)), seq(tensor(float8e4m3fnuz)), seq(tensor(float8e5m2)), seq(tensor(float8e5m2fnuz)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||16+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**B** = tensor(bool)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||7+|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|ImageScaler|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|6+|**T** = tensor(float), tensor(float16)| +|IsInf|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(float)
**T2** = tensor(bool)| +|||10+|**T1** = tensor(float)
**T2** = tensor(bool)| +|IsNaN|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| +|||13+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| +|||9+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(bool)| +|LRN|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|14+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|LayerNormalization|*in* X:**T**
*in* Scale:**T**
*in* B:**T**
*out* Y:**T**
*out* Mean:**U**
*out* InvStdDev:**U**

or

*in* X:**T**
*in* Scale:**V**
*in* B:**V**
*out* Y:**V**
*out* Mean:**U**
*out* InvStdDev:**U**|17+|**T** = tensor(float), tensor(float16)
**U** = tensor(float)| +|||1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| +|LeakyRelu|*in* X:**T**
*out* Y:**T**|16+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Less|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||7+|**T** = tensor(float), tensor(float16)
**T1** = tensor(bool)| +|LessOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| +|Log|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|LogSoftmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|LpPool|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||2+|**T** = tensor(float), tensor(float16)| +|MatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|MatMulInteger|*in* A:**T1**
*in* B:**T2**
*in* a_zero_point:**T1**
*in* b_zero_point:**T2**
*out* Y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int32)| +|Max|*in* data_0:**T**
*out* max:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||8+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|12+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| +|||11+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| +|||10+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| +|||8+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)| +|||1+|**T** = tensor(float), tensor(float16)| +|MaxRoiPool|*in* X:**T**
*in* rois:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|11+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int64)| +|||9+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int64)| +|Mean|*in* data_0:**T**
*out* mean:**T**|13+|**T** = tensor(float), tensor(float16)| +|||8+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|MeanVarianceNormalization|*in* X:**T**
*out* Y:**T**

or

*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|MemcpyFromHost|*in* X:**T**
*out* Y:**T**|1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)| +|MemcpyToHost|*in* X:**T**
*out* Y:**T**|1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)| +|Min|*in* data_0:**T**
*out* min:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||8+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Mod|*in* A:**T**
*in* B:**T**
*out* C:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|||10+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|Mul|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Neg|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| +|||6+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)| +|NonZero|*in* X:**T**
*out* Y:**tensor(int64)**|13+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|||9+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|Not|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(bool)| +|OneHot|*in* indices:**T1**
*in* depth:**T2**
*in* values:**T3**
*out* output:**T3**|11+|**T1** = tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T3** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**T1** = tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)
**T2** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T3** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|OptionalGetElement|*in* input:**O**
*out* output:**V**|18+|**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8)), seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||15+|**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8))
**V** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|OptionalHasElement|*in* input:**O**
*out* output:**B**|18+|**B** = tensor(bool)
**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8)), seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||15+|**B** = tensor(bool)
**O** = optional(seq(tensor(bfloat16))), optional(seq(tensor(bool))), optional(seq(tensor(double))), optional(seq(tensor(float))), optional(seq(tensor(float16))), optional(seq(tensor(int16))), optional(seq(tensor(int32))), optional(seq(tensor(int64))), optional(seq(tensor(int8))), optional(seq(tensor(string))), optional(seq(tensor(uint16))), optional(seq(tensor(uint32))), optional(seq(tensor(uint64))), optional(seq(tensor(uint8))), optional(tensor(bfloat16)), optional(tensor(bool)), optional(tensor(double)), optional(tensor(float)), optional(tensor(float16)), optional(tensor(int16)), optional(tensor(int32)), optional(tensor(int64)), optional(tensor(int8)), optional(tensor(string)), optional(tensor(uint16)), optional(tensor(uint32)), optional(tensor(uint64)), optional(tensor(uint8))| +|Or|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| +|PRelu|*in* X:**T**
*in* slope:**T**
*out* Y:**T**|16+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| +|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| +|||7+|**T** = tensor(float), tensor(float16)| +|Pad|*in* data:**T**
*in* pads:**tensor(int64)**
*in* constant_value:**T**
*in* axes:**Tind**
*out* output:**T**

or

*in* data:**T**
*in* pads:**tensor(int64)**
*in* constant_value:**T**
*out* output:**T**

or

*in* data:**T**
*out* output:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||2+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|ParametricSoftplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|Pow|*in* X:**T**
*in* Y:**T**
*out* Z:**T**

or

*in* X:**T**
*in* Y:**T1**
*out* Z:**T**|15+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int32)
**T1** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|||7+|**T** = tensor(float), tensor(float16)| +|QLinearConv|*in* x:**T1**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T1**
*in* w:**T2**
*in* w_scale:**tensor(float)**
*in* w_zero_point:**T2**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T3**
*in* B:**T4**
*out* y:**T3**|10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)
**T4** = tensor(int32)| +|QLinearMatMul|*in* a:**T1**
*in* a_scale:**TS**
*in* a_zero_point:**T1**
*in* b:**T2**
*in* b_scale:**TS**
*in* b_zero_point:**T2**
*in* y_scale:**TS**
*in* y_zero_point:**T3**
*out* y:**T3**

or

*in* a:**T1**
*in* a_scale:**tensor(float)**
*in* a_zero_point:**T1**
*in* b:**T2**
*in* b_scale:**tensor(float)**
*in* b_zero_point:**T2**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T3**
*out* y:**T3**|21+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)| +|||10+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(int8), tensor(uint8)| +|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**

or

*in* x:**T1**
*in* y_scale:**T2**
*in* y_zero_point:**T3**
*out* y:**T3**

or

*in* x:**T1**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T2**
*out* y:**T2**|21+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)| +|||19+|**T1** = tensor(float), tensor(float16), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| +|||13+|**T1** = tensor(float), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| +|||10+|**T1** = tensor(float), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| +|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|Range|*in* start:**T**
*in* limit:**T**
*in* delta:**T**
*out* output:**T**|11+|**T** = tensor(float), tensor(int16), tensor(int32), tensor(int64)| +|Reciprocal|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|ReduceL1|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|ReduceL2|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceLogSum|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceLogSumExp|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceMax|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceMean|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceMin|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|20+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||12+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|ReduceProd|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|ReduceSum|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|ReduceSumSquare|*in* data:**T**
*in* axes:**tensor(int64)**
*out* reduced:**T**

or

*in* data:**T**
*out* reduced:**T**|18+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||11+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|||1+|**T** = tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| +|Relu|*in* X:**T**
*out* Y:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8)| +|||13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Reshape|*in* data:**T**
*in* shape:**tensor(int64)**
*out* reshaped:**T**

or

*in* data:**T**
*out* reshaped:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||14+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||5+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Resize|*in* X:**T**
*in* scales:**tensor(float)**
*out* Y:**T**

or

*in* X:**T1**
*in* roi:**T2**
*in* scales:**tensor(float)**
*in* sizes:**tensor(int64)**
*out* Y:**T1**|19+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||18+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||13+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||11+|**T1** = tensor(float), tensor(float16), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|||10+|**T** = tensor(float), tensor(float16)| +|ReverseSequence|*in* input:**T**
*in* sequence_lens:**tensor(int64)**
*out* Y:**T**|10+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|RoiAlign|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*out* Y:**T1**|16+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| +|||10+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| +|Round|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(float), tensor(float16)| +|STFT|*in* signal:**T1**
*in* frame_step:**T2**
*in* window:**T1**
*in* frame_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(int32), tensor(int64)| +|ScaledTanh|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|Scatter|*in* data:**T**
*in* indices:**Tind**
*in* updates:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||9+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|ScatterElements|*in* data:**T**
*in* indices:**Tind**
*in* updates:**T**
*out* output:**T**|16+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|ScatterND|*in* data:**T**
*in* indices:**tensor(int64)**
*in* updates:**T**
*out* output:**T**|16+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Selu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float), tensor(float16)| +|SequenceAt|*in* input_sequence:**S**
*in* position:**I**
*out* tensor:**T**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|SequenceConstruct|*in* inputs:**T**
*out* output_sequence:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|SequenceEmpty|*out* output:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| +|SequenceErase|*in* input_sequence:**S**
*in* position:**I**
*out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| +|SequenceInsert|*in* input_sequence:**S**
*in* tensor:**T**
*in* position:**I**
*out* output_sequence:**S**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| +|SequenceLength|*in* input_sequence:**S**
*out* length:**I**|11+|**I** = tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| +|Shape|*in* data:**T**
*out* shape:**T1**|21+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||19+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||15+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||13+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|Shrink|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint8)| +|Sigmoid|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Sign|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|SimplifiedLayerNormalization|*in* X:**T**
*in* scale:**V**
*out* Y:**V**
*out* inv_std_var:**U**|1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| +|Sin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Sinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| +|Size|*in* data:**T**
*out* size:**T1**|21+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||19+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||13+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|||1+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int4), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint4), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| +|Slice|*in* data:**T**
*in* starts:**Tind**
*in* ends:**Tind**
*in* axes:**Tind**
*in* steps:**Tind**
*out* output:**T**

or

*in* data:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||10+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Softmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||11+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|Softplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|Softsign|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|SpaceToDepth|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Split|*in* input:**T**
*in* split:**T**
*out* outputs...:**T**

or

*in* input:**T**
*in* split:**tensor(int64)**
*out* outputs:**T**

or

*in* input:**T**
*out* outputs:**T**|18+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||2+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Sqrt|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Squeeze|*in* data:**T**
*in* axes:**tensor(int64)**
*out* squeezed:**T**

or

*in* data:**T**
*out* squeezed:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Sub|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||7+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Sum|*in* data_0:**T**
*out* sum:**T**|13+|**T** = tensor(float), tensor(float16)| +|||8+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|Tan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| +|Tanh|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16)| +|||6+|**T** = tensor(float), tensor(float16)| +|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)| +|Tile|*in* input:**T**
*in* repeats:**T1**
*out* output:**T**

or

*in* input:**T**
*in* tiles:**T**
*in* axis:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||6+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|TopK|*in* X:**T**
*in* K:**tensor(int64)**
*out* Values:**T**
*out* Indices:**I**

or

*in* X:**T**
*out* Values:**T**
*out* Indices:**I**|11+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||10+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**I** = tensor(int64)
**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Transpose|*in* data:**T**
*out* transposed:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Trilu|*in* input:**T**
*in* k:**tensor(int64)**
*out* output:**T**|14+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Unsqueeze|*in* data:**T**
*in* axes:**tensor(int64)**
*out* expanded:**T**

or

*in* data:**T**
*out* expanded:**T**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||11+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||1+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Upsample|*in* X:**T**
*in* scales:**tensor(float)**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float), tensor(float16)| +|||9+|**T** = tensor(float), tensor(float16)| +|||7+|**T** = tensor(float), tensor(float16)| +|Where|*in* condition:**B**
*in* X:**T**
*in* Y:**T**
*out* output:**T**|16+|**B** = tensor(bool)
**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|||9+|**B** = tensor(bool)
**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|Xor|*in* A:**T**
*in* B:**T**
*out* C:**T1**|7+|**T** = tensor(bool)| +| | +| | +|**Operator Domain:** *com.microsoft*|||| +|Attention|*in* input:**T**
*in* weights:**T**
*in* bias:**T**
*in* mask_index:**M**
*in* past:**T**
*in* attention_bias:**T**
*in* past_sequence_length:**M**
*out* output:**T**
*out* present:**T**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| +|BiasAdd|*in* X:**T**
*in* bias:**T**
*in* skip:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|BiasGelu|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float), tensor(float16)| +|BiasSplitGelu|*in* X:**T**
*in* bias:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|ConvTransposeWithDynamicPads|*in* X:**T**
*in* W:**T**
*in* Pads:**tensor(int64)**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| +|DynamicQuantizeMatMul|*in* A:**T1**
*in* B:**T2**
*in* b_scale:**T1**
*in* b_zero_point:**T2**
*in* bias:**T1**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| +|EmbedLayerNormalization|*in* input_ids:**T1**
*in* segment_ids:**T1**
*in* word_embedding:**T**
*in* position_embedding:**T**
*in* segment_embedding:**T**
*in* gamma:**T**
*in* beta:**T**
*in* mask:**T1**
*in* position_ids:**T1**
*out* output:**T**
*out* mask_index:**T1**
*out* embedding_sum:**T**|1+|**T** = tensor(float), tensor(float16)| +|FastGelu|*in* X:**T**
*in* bias:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|FusedMatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|FusedMatMulActivation|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|Gelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|GroupNorm|*in* X:**T**
*in* gamma:**M**
*in* beta:**M**
*out* Y:**T**|1+|**M** = tensor(float), tensor(float16)
**T** = tensor(float), tensor(float16)| +|GroupQueryAttention|*in* query:**T**
*in* key:**T**
*in* value:**T**
*in* past_key:**T_CACHE**
*in* past_value:**T_CACHE**
*in* seqlens_k:**M**
*in* total_sequence_length:**M**
*in* cos_cache:**T**
*in* sin_cache:**T**
*in* position_ids:**tensor(int64)**
*in* attention_bias:**T**
*in* head_sink:**T**
*in* k_scale:**T_KV_SCALE**
*in* v_scale:**T_KV_SCALE**
*out* output:**T**
*out* present_key:**T_CACHE**
*out* present_value:**T_CACHE**
*out* output_qk:**T**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| +|MatMulIntegerToFloat|*in* A:**T1**
*in* B:**T2**
*in* a_scale:**T3**
*in* b_scale:**T3**
*in* a_zero_point:**T1**
*in* b_zero_point:**T2**
*in* bias:**T3**
*out* Y:**T3**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(float), tensor(float16)| +|MatMulNBits|*in* A:**T1**
*in* B:**T2**
*in* scales:**T1**
*in* zero_points:**T3**
*in* g_idx:**T4**
*in* bias:**T1**
*out* Y:**T1**|1+|**T1** = tensor(float), tensor(float16)
**T2** = tensor(uint8)| +|MultiHeadAttention|*in* query:**T**
*in* key:**T**
*in* value:**T**
*in* bias:**T**
*in* key_padding_mask:**M**
*in* attention_bias:**T**
*in* past_key:**T**
*in* past_value:**T**
*in* past_sequence_length:**M**
*in* cache_indirection:**M**
*out* output:**T**
*out* present_key:**T**
*out* present_value:**T**
*out* qk:**QK**|1+|**M** = tensor(int32)
**T** = tensor(float), tensor(float16)| +|NhwcConv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|QAttention|*in* input:**T1**
*in* weight:**T2**
*in* bias:**T3**
*in* input_scale:**T3**
*in* weight_scale:**T3**
*in* mask_index:**T4**
*in* input_zero_point:**T1**
*in* weight_zero_point:**T2**
*in* past:**T3**
*out* output:**T3**
*out* present:**T3**|1+|**T1** = tensor(int8), tensor(uint8)
**T2** = tensor(int8), tensor(uint8)
**T3** = tensor(float), tensor(float16)
**T4** = tensor(int32)| +|QLinearAdd|*in* A:**T**
*in* A_scale:**tensor(float)**
*in* A_zero_point:**T**
*in* B:**T**
*in* B_scale:**tensor(float)**
*in* B_zero_point:**T**
*in* C_scale:**tensor(float)**
*in* C_zero_point:**T**
*out* C:**T**|1+|**T** = tensor(int8), tensor(uint8)| +|QLinearAveragePool|*in* X:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| +|QLinearConcat|*in* Y_scale:**TF**
*in* Y_zero_point:**T8**
*in* inputs:**TV**
*out* Y:**T8**|1+|**T8** = tensor(int8), tensor(uint8)
**TF** = tensor(float)
**TV** = tensor(float), tensor(int8), tensor(uint8)| +|QLinearGlobalAveragePool|*in* X:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| +|QLinearSigmoid|*in* X:**T**
*in* X_scale:**tensor(float)**
*in* X_zero_point:**T**
*in* Y_scale:**tensor(float)**
*in* Y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| +|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**|1+|**T1** = tensor(float), tensor(float16), tensor(int32)
**T2** = tensor(int8), tensor(uint8)| +|QuickGelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|RotaryEmbedding|*in* input:**T**
*in* position_ids:**M**
*in* cos_cache:**T**
*in* sin_cache:**T**
*out* output:**T**|1+|**M** = tensor(int64)
**T** = tensor(float), tensor(float16)| +|SkipLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* beta:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| +|SkipSimplifiedLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| +| | +| | +|**Operator Domain:** *com.microsoft.dml*|||| +|DmlFusedAdd|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedBatchNormalization|*in* X:**T**
*in* scale:**T**
*in* B:**T**
*in* mean:**T**
*in* var:**T**
*out* Y:**T**
*out* mean:**T**
*out* var:**T**
*out* saved_mean:**T**
*out* saved_var:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedConv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedGemm|*in* A:**T**
*in* B:**T**
*in* C:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedInstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedMatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedMeanVarianceNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float), tensor(float16)| +|DmlFusedSum|*in* data_0:**T**
*out* sum:**T**|1+|**T** = tensor(float), tensor(float16)| +| | +| | diff --git a/onnxruntime/core/providers/cpu/math/cumprod.cc b/onnxruntime/core/providers/cpu/math/cumprod.cc index a37bbe32a9c7a..2a7e63a68e9b3 100644 --- a/onnxruntime/core/providers/cpu/math/cumprod.cc +++ b/onnxruntime/core/providers/cpu/math/cumprod.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include +#include #include "cumprod.h" #include "core/providers/common.h" @@ -18,8 +19,9 @@ Status GetAxis(const Tensor* axis_tensor, int64_t input_rank, int64_t& axis_out) if (!axis_tensor) return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Axis tensor must be provided to the CumProd op"); - if (axis_tensor->Shape().NumDimensions() > 1) - return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Axis tensor should be 0D or 1D"); + if (axis_tensor->Shape().NumDimensions() > 1 || axis_tensor->Shape().Size() != 1) + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, + "Axis tensor must be a scalar (0-D) or 1-D tensor with exactly one element"); if (axis_tensor->IsDataType()) { axis_out = static_cast(axis_tensor->Data()[0]); diff --git a/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc index e932438f6a914..e447f66e07635 100644 --- a/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc +++ b/onnxruntime/core/providers/cpu/tensor/bitcast_op.cc @@ -54,6 +54,8 @@ Status BitCast::Compute(OpKernelContext* context) const { const size_t input_element_size = input->DataType()->Size(); const auto* output_type = DataTypeImpl::TensorTypeFromONNXEnum(to_); + ORT_RETURN_IF_NOT(output_type != nullptr, + "BitCast: unsupported target type (ONNX enum value: ", to_, ")."); const size_t output_element_size = output_type->GetElementType()->Size(); ORT_RETURN_IF_NOT(input_element_size == output_element_size, From 8ff65796b9c4de7e5eebf44a226e4db856f9a69e Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Wed, 11 Mar 2026 17:17:50 +0000 Subject: [PATCH 06/18] Fix MakeInitializer shape/data mismatch in ReshapeFusion test ONNX 1.21.0 (onnx/onnx#7675) added stricter raw_data size validation in ParseData. The test had shape {4} but only 3 values {2, 64, 32}, which old ONNX silently ignored. Fix shape to {3}. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- onnxruntime/test/optimizer/graph_transform_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index d7780da36626c..356ac1be7d34e 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -4355,7 +4355,7 @@ TEST_F(GraphTransformationTests, ReshapeFusion_Contiguous_Reshape) { auto build_test_case = [&](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput({{8, 16, 32}}); auto* shape_initializer_1 = builder.MakeInitializer({4}, {2, 4, 16, 32}); - auto* shape_initializer_2 = builder.MakeInitializer({4}, {2, 64, 32}); + auto* shape_initializer_2 = builder.MakeInitializer({3}, {2, 64, 32}); auto* axes_initializer = builder.MakeInitializer({1}, {1}); auto* reshape_out_1 = builder.MakeIntermediate(); auto* reshape_out_2 = builder.MakeIntermediate(); From 29744dae8b95f0f91911a0f4167f2de92942823b Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Wed, 11 Mar 2026 17:38:18 +0000 Subject: [PATCH 07/18] webgl-operators.md update --- js/web/docs/webgl-operators.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/js/web/docs/webgl-operators.md b/js/web/docs/webgl-operators.md index 8e55d8c4e2564..98a79de61727c 100644 --- a/js/web/docs/webgl-operators.md +++ b/js/web/docs/webgl-operators.md @@ -24,6 +24,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat | [AveragePool](https://github.com/onnx/onnx/blob/main/docs/Operators.md#AveragePool) | [7-9](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-7), [10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-10), [11-18](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-11), [19-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-19), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#AveragePool-22) | | [BatchNormalization](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BatchNormalization) | [7-8](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-7), [9-13](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-9), [14](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-14), [15+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#BatchNormalization-15) | | [Bernoulli](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Bernoulli) | | +| [BitCast](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BitCast) | | | [BitShift](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BitShift) | | | [BitwiseAnd](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BitwiseAnd) | | | [BitwiseNot](https://github.com/onnx/onnx/blob/main/docs/Operators.md#BitwiseNot) | | @@ -47,6 +48,7 @@ See [Compatibility](../README.md#Compatibility) for a list of the supported plat | [ConvTranspose](https://github.com/onnx/onnx/blob/main/docs/Operators.md#ConvTranspose) | [1-10](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-1), [11-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-11), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#ConvTranspose-22) | | [Cos](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Cos) | [7-21](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Cos-7), [22+](https://github.com/onnx/onnx/blob/main/docs/Changelog.md#Cos-22) | | [Cosh](https://github.com/onnx/onnx/blob/main/docs/Operators.md#Cosh) | | +| [CumProd](https://github.com/onnx/onnx/blob/main/docs/Operators.md#CumProd) | | | [CumSum](https://github.com/onnx/onnx/blob/main/docs/Operators.md#CumSum) | | | [DFT](https://github.com/onnx/onnx/blob/main/docs/Operators.md#DFT) | | | [DeformConv](https://github.com/onnx/onnx/blob/main/docs/Operators.md#DeformConv) | | From a1b929c67a1e233e52bb73b403a8f41c446ed3c9 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 12 Mar 2026 19:56:27 +0000 Subject: [PATCH 08/18] Update ONNX to 1.21.0rc2 and address PR review comments - Update ONNX submodule, deps.txt, vcpkg portfile to rc2 commit a51ac075 - Update onnx==1.21.0rc2 in all 7 requirements.txt files - Fix cumprod.cc review comments (namespace, ORT_ENFORCE, type, closing brace) - Add 5 test exclusions: 4 DFT rfft/irfft tests (ORT lacks IRFFT) + 1 BitCast bool test Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/deps.txt | 2 +- cmake/external/onnx | 2 +- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- .../core/providers/cpu/math/cumprod.cc | 20 ++++++------------- onnxruntime/test/python/requirements.txt | 2 +- .../onnx_backend_test_series_filters.jsonc | 11 +++++++++- .../python/cpu/scripts/requirements.txt | 2 +- .../docker/scripts/lort/requirements.txt | 2 +- .../docker/scripts/manylinux/requirements.txt | 2 +- .../linux/docker/scripts/requirements.txt | 2 +- .../github/linux/python/requirements.txt | 2 +- .../github/windows/python/requirements.txt | 2 +- 12 files changed, 27 insertions(+), 26 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 2d7196646434f..9ee8dc85a4ffc 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/fbbe45b8e25b5b0018cc038caaf906d3b09634ee.zip;c38208d94ec0dd799a8468ac72f6058f74d44830 +onnx;https://github.com/onnx/onnx/archive/a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5.zip;9f250d23582b974a1bd5119ada7306a298bc6411 # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/external/onnx b/cmake/external/onnx index fbbe45b8e25b5..a51ac0754e0f6 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit fbbe45b8e25b5b0018cc038caaf906d3b09634ee +Subproject commit a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5 diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index ce95fa0e4535e..b411799a647a4 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF fbbe45b8e25b5b0018cc038caaf906d3b09634ee - SHA512 971a71b6d0fdb96270f82851c6a5940cc1c34d224247b678033ba179ffd8cc7bfecf59b235d013a0b94d089bd7d6fe46d01b2d6f5056bdb9fdff98fba0cc4e27 + REF a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5 + SHA512 e29c75cf22fea46f659f03cff470e0aecc22fbe3ee4baf082208371a192c6fa58c2080391d4959b95db12b239eb329e215020c85a40be756a2a44fa5b375cb6f PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch diff --git a/onnxruntime/core/providers/cpu/math/cumprod.cc b/onnxruntime/core/providers/cpu/math/cumprod.cc index 2a7e63a68e9b3..3c0646b3d6b9a 100644 --- a/onnxruntime/core/providers/cpu/math/cumprod.cc +++ b/onnxruntime/core/providers/cpu/math/cumprod.cc @@ -10,8 +10,6 @@ #include "core/framework/op_kernel.h" #include "core/framework/tensorprotoutils.h" -using namespace onnxruntime; - namespace onnxruntime { namespace cumprod_op { @@ -104,27 +102,21 @@ CumProd::CumProd(const OpKernelInfo& info) : OpKernel(info), exclusive_(), re int64_t exclusive = 0; auto status = info.GetAttr("exclusive", &exclusive); if (status.IsOK()) { - if (exclusive == 1 || exclusive == 0) { - exclusive_ = exclusive; - } else { - ORT_ENFORCE(false, "attribute exclusive can only be 0 or 1"); - } + ORT_ENFORCE(exclusive == 0 || exclusive == 1, "exclusive attribute must be 0 or 1, got: ", exclusive); + exclusive_ = exclusive; } int64_t reverse = 0; status = info.GetAttr("reverse", &reverse); if (status.IsOK()) { - if (reverse == 1 || reverse == 0) { - reverse_ = reverse; - } else { - ORT_ENFORCE(false, "attribute reverse can only be 0 or 1"); - } + ORT_ENFORCE(reverse == 0 || reverse == 1, "reverse attribute must be 0 or 1, got: ", reverse); + reverse_ = reverse; } } template Status CumProd::Compute(OpKernelContext* ctx) const { const Tensor* input = ctx->Input(0); - size_t rank = input->Shape().NumDimensions(); + int64_t rank = static_cast(input->Shape().NumDimensions()); if (rank == 0) return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Cannot apply CumProd operator on a scalar"); @@ -221,4 +213,4 @@ Status CumProd::Compute(OpKernelContext* ctx) const { return Status::OK(); } -}; // namespace onnxruntime +} // namespace onnxruntime diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index c3f671ebf7de9..58a0214d4f0c8 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.21.0rc1 +onnx==1.21.0rc2 pytest onnx-ir diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index b40d0bc67d3ed..262d5dfe797e6 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -379,7 +379,16 @@ "^test_quantizelinear_int4", "^test_quantizelinear_uint4", // topk uint64 is not implemented in ORT yet. - "^test_top_k_uint64" + "^test_top_k_uint64", + // ORT DFT kernel does not implement IRFFT (inverse real FFT) — it always outputs + // complex (last dim=2) which is wrong for IRFFT. These ONNX backend tests were + // added in onnx commit ee910d0e4 for DFT-20 spec clarification. + "^test_dft_rfft", + "^test_dft_irfft", + "^test_dft_rfft_opset19", + "^test_dft_irfft_opset19", + // ORT BitCast kernel does not support bool type. + "^test_bitcast_bool_to_uint8" ], "current_failing_tests_x86": [ "^test_vgg19", diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index d4511e31964ba..97a98c32f7aaf 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.21.0rc1 +onnx==1.21.0rc2 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index c33fd1d102a5c..5428b0253a9d1 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.21.0rc1 +onnx==1.21.0rc2 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index cdd375d49f0b6..ea9fe01438082 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.21.0rc1 +onnx==1.21.0rc2 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 8628e81251eac..392956c4d0b21 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.21.0rc1 +onnx==1.21.0rc2 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 8b7af9d97a6b1..9f9028ea37459 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.21.0rc1 +onnx==1.21.0rc2 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index 110d3785d1369..5f1e3b9b19c0b 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.21.0rc1 +onnx==1.21.0rc2 From 54948d8c4a74f1436b9faee645b1a7b56b26c4da Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 12 Mar 2026 20:29:46 +0000 Subject: [PATCH 09/18] Add test_bitcast_bool_to_uint8 and DFT rfft/irfft to C++ test runner broken tests The JSONC filter only covers Python backend tests. The C++ onnx_test_runner uses hardcoded arrays in TestCase.cc GetBrokenTests(). Add BitCast bool and DFT rfft/irfft filters to cover the C++ test runner path. ORT BitCast kernel doesn't register bool type, and ORT DFT kernel lacks IRFFT (inverse real FFT) support. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Agent-signed-off: Developer (45720d0d) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- onnxruntime/test/onnx/TestCase.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index fbb9fb2797a88..e9758f1071c85 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -1462,6 +1462,12 @@ std::unique_ptr> GetBrokenTests(const std::string& provider broken_tests->insert({"attention_4d_with_past_and_present_qk_matmul_bias_3d_mask_causal_expanded", "unknown version"}); broken_tests->insert({"attention_4d_with_past_and_present_qk_matmul_bias_4d_mask_causal_expanded", "unknown version"}); broken_tests->insert({"convinteger_with_padding", "unknown version"}); + // Fails since ONNX==1.21.0 + broken_tests->insert({"dft_irfft", "unknown version"}); + broken_tests->insert({"dft_irfft_opset19", "unknown version"}); + broken_tests->insert({"dft_rfft", "unknown version"}); + broken_tests->insert({"dft_rfft_opset19", "unknown version"}); + broken_tests->insert({"bitcast_bool_to_uint8", "ORT BitCast kernel does not register bool type"}); } #ifdef DISABLE_CONTRIB_OPS From aed5d3d154a1a3a710c1edc2596a1ddceb2f0ee4 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 12 Mar 2026 22:00:56 +0000 Subject: [PATCH 10/18] Patch ONNX Slice shape inference for dim_value==0 UB ONNX 1.21.0rc2 enables _GLIBCXX_ASSERTIONS (onnx/onnx#7601) which exposes pre-existing undefined behavior in Slice shape inference: std::clamp(start, 0, dim_value-1) with dim_value=0 violates lo<=hi. Add early-exit guard for both opset 10 and 11 locations in old.cc. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/patches/onnx/onnx.patch | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch index 0a5680778790b..e8e29f99a7cf1 100644 --- a/cmake/patches/onnx/onnx.patch +++ b/cmake/patches/onnx/onnx.patch @@ -82,3 +82,37 @@ index a6a8a83..153da87 100644 .SetDoc(GroupNormalization_ver18_doc) .Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) .Attr( +diff --git a/onnx/defs/tensor/old.cc b/onnx/defs/tensor/old.cc +index c2ba43b7c..c0f229c6e 100644 +--- a/onnx/defs/tensor/old.cc ++++ b/onnx/defs/tensor/old.cc +@@ -2632,6 +2632,16 @@ ONNX_OPERATOR_SET_SCHEMA( + + const auto input_dim_value = input_dim.dim_value(); + ++ // empty dimension: output is always 0 regardless of starts/ends/steps ++ if (input_dim_value == 0) { ++ ctx.getOutputType(0) ++ ->mutable_tensor_type() ++ ->mutable_shape() ++ ->mutable_dim(static_cast(axis)) ++ ->set_dim_value(0); ++ continue; ++ } ++ + // process step + auto step = steps[axis_index]; + if (step == 0) { +@@ -6315,6 +6325,12 @@ ONNX_OPERATOR_SET_SCHEMA( + + const auto input_dim_value = input_dim.dim_value(); + ++ // empty dimension: output is always 0 regardless of starts/ends/steps ++ if (input_dim_value == 0) { ++ ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->mutable_dim(axis)->set_dim_value(0); ++ continue; ++ } ++ + // process step + auto step = steps[axis_index]; + if (step == 0) { From 1d32aea331098fb959425c803f094becb1668f61 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 12 Mar 2026 22:03:06 +0000 Subject: [PATCH 11/18] Sync Slice dim_value==0 fix to vcpkg binskim.patch The onnx.patch fix must also be in binskim.patch for Windows CI builds. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/vcpkg-ports/onnx/binskim.patch | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cmake/vcpkg-ports/onnx/binskim.patch b/cmake/vcpkg-ports/onnx/binskim.patch index 0a5680778790b..e8e29f99a7cf1 100644 --- a/cmake/vcpkg-ports/onnx/binskim.patch +++ b/cmake/vcpkg-ports/onnx/binskim.patch @@ -82,3 +82,37 @@ index a6a8a83..153da87 100644 .SetDoc(GroupNormalization_ver18_doc) .Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) .Attr( +diff --git a/onnx/defs/tensor/old.cc b/onnx/defs/tensor/old.cc +index c2ba43b7c..c0f229c6e 100644 +--- a/onnx/defs/tensor/old.cc ++++ b/onnx/defs/tensor/old.cc +@@ -2632,6 +2632,16 @@ ONNX_OPERATOR_SET_SCHEMA( + + const auto input_dim_value = input_dim.dim_value(); + ++ // empty dimension: output is always 0 regardless of starts/ends/steps ++ if (input_dim_value == 0) { ++ ctx.getOutputType(0) ++ ->mutable_tensor_type() ++ ->mutable_shape() ++ ->mutable_dim(static_cast(axis)) ++ ->set_dim_value(0); ++ continue; ++ } ++ + // process step + auto step = steps[axis_index]; + if (step == 0) { +@@ -6315,6 +6325,12 @@ ONNX_OPERATOR_SET_SCHEMA( + + const auto input_dim_value = input_dim.dim_value(); + ++ // empty dimension: output is always 0 regardless of starts/ends/steps ++ if (input_dim_value == 0) { ++ ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->mutable_dim(axis)->set_dim_value(0); ++ continue; ++ } ++ + // process step + auto step = steps[axis_index]; + if (step == 0) { From b691c5250c70da2564062c456eec8f532f6fb34c Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 12 Mar 2026 22:27:26 +0000 Subject: [PATCH 12/18] Add Slice dim_value==0 fix for defs.cc (opset 13) Covers the third std::clamp UB location in processSliceInputs. All three sites now patched: old.cc:2646, old.cc:6329, defs.cc:792. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/patches/onnx/onnx.patch | 17 +++++++++++++++++ cmake/vcpkg-ports/onnx/binskim.patch | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch index e8e29f99a7cf1..925669ae2d719 100644 --- a/cmake/patches/onnx/onnx.patch +++ b/cmake/patches/onnx/onnx.patch @@ -116,3 +116,20 @@ index c2ba43b7c..c0f229c6e 100644 // process step auto step = steps[axis_index]; if (step == 0) { +diff --git a/onnx/defs/tensor/defs.cc b/onnx/defs/tensor/defs.cc +index 4e0c5f913..a7cd6171a 100644 +--- a/onnx/defs/tensor/defs.cc ++++ b/onnx/defs/tensor/defs.cc +@@ -794,6 +794,12 @@ static void processSliceInputs(const int64_t input_rank, int64_t& start, int64_ + if (step == 0) { + fail_shape_inference("'step' cannot be 0 for Slice"); + } ++ // empty dimension: start=0, end=0 so caller computes output_dim=0 ++ if (input_rank == 0) { ++ start = 0; ++ end = 0; ++ return; ++ } + // process start + if (start < 0) + start += input_rank; diff --git a/cmake/vcpkg-ports/onnx/binskim.patch b/cmake/vcpkg-ports/onnx/binskim.patch index e8e29f99a7cf1..925669ae2d719 100644 --- a/cmake/vcpkg-ports/onnx/binskim.patch +++ b/cmake/vcpkg-ports/onnx/binskim.patch @@ -116,3 +116,20 @@ index c2ba43b7c..c0f229c6e 100644 // process step auto step = steps[axis_index]; if (step == 0) { +diff --git a/onnx/defs/tensor/defs.cc b/onnx/defs/tensor/defs.cc +index 4e0c5f913..a7cd6171a 100644 +--- a/onnx/defs/tensor/defs.cc ++++ b/onnx/defs/tensor/defs.cc +@@ -794,6 +794,12 @@ static void processSliceInputs(const int64_t input_rank, int64_t& start, int64_ + if (step == 0) { + fail_shape_inference("'step' cannot be 0 for Slice"); + } ++ // empty dimension: start=0, end=0 so caller computes output_dim=0 ++ if (input_rank == 0) { ++ start = 0; ++ end = 0; ++ return; ++ } + // process start + if (start < 0) + start += input_rank; From d05c769293517e37e95316adcebf49a055b4d61c Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 16 Mar 2026 18:58:36 +0000 Subject: [PATCH 13/18] Update ONNX to 1.21.0rc3 - Update cmake/deps.txt: commit hash and SHA1 for rc3 zip - Update cmake/external/onnx submodule to rc3 commit (e6c12c5fa) - Update cmake/vcpkg-ports/onnx/portfile.cmake: REF and SHA512 - Update onnx==1.21.0rc3 in all 7 requirements.txt files - Verified all vcpkg patches (binskim, fix-cmakelists, fix-dependency-protobuf) and cmake/patches/onnx/onnx.patch apply cleanly to rc3 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Agent-signed-off: Developer (257e49bb) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/deps.txt | 2 +- cmake/external/onnx | 2 +- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- onnxruntime/test/python/requirements.txt | 2 +- .../inference/aarch64/python/cpu/scripts/requirements.txt | 2 +- .../github/linux/docker/scripts/lort/requirements.txt | 2 +- .../github/linux/docker/scripts/manylinux/requirements.txt | 2 +- tools/ci_build/github/linux/docker/scripts/requirements.txt | 2 +- tools/ci_build/github/linux/python/requirements.txt | 2 +- tools/ci_build/github/windows/python/requirements.txt | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 9ee8dc85a4ffc..b7d233d5b19a2 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5.zip;9f250d23582b974a1bd5119ada7306a298bc6411 +onnx;https://github.com/onnx/onnx/archive/e6c12c5fa7857729e081e2ec90f96dfefeb79b83.zip;de083cbccbd6e427e94deb263c63aa474059fd01 # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/external/onnx b/cmake/external/onnx index a51ac0754e0f6..e6c12c5fa7857 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5 +Subproject commit e6c12c5fa7857729e081e2ec90f96dfefeb79b83 diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index b411799a647a4..c590d8bc70b4f 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF a51ac0754e0f61d3a4fa70a3821aeaeb740ac7a5 - SHA512 e29c75cf22fea46f659f03cff470e0aecc22fbe3ee4baf082208371a192c6fa58c2080391d4959b95db12b239eb329e215020c85a40be756a2a44fa5b375cb6f + REF e6c12c5fa7857729e081e2ec90f96dfefeb79b83 + SHA512 f54b7020486a80ab4942d9e6aa9f7c393ca16b6a51717c7bd81cfbaf3cd9cbf8c45281f3bf55388dcf16885d8f0b8ae5656d8b5a3af15757aad7fa2f372a2be2 PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index 58a0214d4f0c8..42c5f0c12450e 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.21.0rc2 +onnx==1.21.0rc3 pytest onnx-ir diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index 97a98c32f7aaf..cc94f92fb57d6 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.21.0rc2 +onnx==1.21.0rc3 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index 5428b0253a9d1..0207f6a3cf6f1 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.21.0rc2 +onnx==1.21.0rc3 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index ea9fe01438082..8eaa5d326975a 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.21.0rc2 +onnx==1.21.0rc3 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 392956c4d0b21..c654621f00879 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.21.0rc2 +onnx==1.21.0rc3 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 9f9028ea37459..71642228fee80 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.21.0rc2 +onnx==1.21.0rc3 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index 5f1e3b9b19c0b..eae136cf452bd 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.21.0rc2 +onnx==1.21.0rc3 From d43772b5972d74c8efa5140b49af4374ea4e96bd Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 16 Mar 2026 19:07:18 +0000 Subject: [PATCH 14/18] Remove Slice dim_value==0 patch hunks already in ONNX rc3 The Slice shape inference fix for dim_value==0 (tensor/old.cc and tensor/defs.cc) was cherry-picked into ONNX rc3 natively (commit 33afebf43, PR #7739). The parameter was also renamed from 'input_rank' to 'input_dim_size_or_value'. Remove these 3 hunks from both onnx.patch and binskim.patch to prevent build failures. Retained hunks: CMakeLists ONNX_MINIMAL_BUILD, Utils.cmake protobuf warnings, GroupNormalization Deprecate removal. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Agent-signed-off: Developer (257e49bb) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/patches/onnx/onnx.patch | 51 ---------------------------- cmake/vcpkg-ports/onnx/binskim.patch | 51 ---------------------------- 2 files changed, 102 deletions(-) diff --git a/cmake/patches/onnx/onnx.patch b/cmake/patches/onnx/onnx.patch index 925669ae2d719..0a5680778790b 100644 --- a/cmake/patches/onnx/onnx.patch +++ b/cmake/patches/onnx/onnx.patch @@ -82,54 +82,3 @@ index a6a8a83..153da87 100644 .SetDoc(GroupNormalization_ver18_doc) .Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) .Attr( -diff --git a/onnx/defs/tensor/old.cc b/onnx/defs/tensor/old.cc -index c2ba43b7c..c0f229c6e 100644 ---- a/onnx/defs/tensor/old.cc -+++ b/onnx/defs/tensor/old.cc -@@ -2632,6 +2632,16 @@ ONNX_OPERATOR_SET_SCHEMA( - - const auto input_dim_value = input_dim.dim_value(); - -+ // empty dimension: output is always 0 regardless of starts/ends/steps -+ if (input_dim_value == 0) { -+ ctx.getOutputType(0) -+ ->mutable_tensor_type() -+ ->mutable_shape() -+ ->mutable_dim(static_cast(axis)) -+ ->set_dim_value(0); -+ continue; -+ } -+ - // process step - auto step = steps[axis_index]; - if (step == 0) { -@@ -6315,6 +6325,12 @@ ONNX_OPERATOR_SET_SCHEMA( - - const auto input_dim_value = input_dim.dim_value(); - -+ // empty dimension: output is always 0 regardless of starts/ends/steps -+ if (input_dim_value == 0) { -+ ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->mutable_dim(axis)->set_dim_value(0); -+ continue; -+ } -+ - // process step - auto step = steps[axis_index]; - if (step == 0) { -diff --git a/onnx/defs/tensor/defs.cc b/onnx/defs/tensor/defs.cc -index 4e0c5f913..a7cd6171a 100644 ---- a/onnx/defs/tensor/defs.cc -+++ b/onnx/defs/tensor/defs.cc -@@ -794,6 +794,12 @@ static void processSliceInputs(const int64_t input_rank, int64_t& start, int64_ - if (step == 0) { - fail_shape_inference("'step' cannot be 0 for Slice"); - } -+ // empty dimension: start=0, end=0 so caller computes output_dim=0 -+ if (input_rank == 0) { -+ start = 0; -+ end = 0; -+ return; -+ } - // process start - if (start < 0) - start += input_rank; diff --git a/cmake/vcpkg-ports/onnx/binskim.patch b/cmake/vcpkg-ports/onnx/binskim.patch index 925669ae2d719..0a5680778790b 100644 --- a/cmake/vcpkg-ports/onnx/binskim.patch +++ b/cmake/vcpkg-ports/onnx/binskim.patch @@ -82,54 +82,3 @@ index a6a8a83..153da87 100644 .SetDoc(GroupNormalization_ver18_doc) .Attr("epsilon", "The epsilon value to use to avoid division by zero.", AttributeProto::FLOAT, 1e-5f) .Attr( -diff --git a/onnx/defs/tensor/old.cc b/onnx/defs/tensor/old.cc -index c2ba43b7c..c0f229c6e 100644 ---- a/onnx/defs/tensor/old.cc -+++ b/onnx/defs/tensor/old.cc -@@ -2632,6 +2632,16 @@ ONNX_OPERATOR_SET_SCHEMA( - - const auto input_dim_value = input_dim.dim_value(); - -+ // empty dimension: output is always 0 regardless of starts/ends/steps -+ if (input_dim_value == 0) { -+ ctx.getOutputType(0) -+ ->mutable_tensor_type() -+ ->mutable_shape() -+ ->mutable_dim(static_cast(axis)) -+ ->set_dim_value(0); -+ continue; -+ } -+ - // process step - auto step = steps[axis_index]; - if (step == 0) { -@@ -6315,6 +6325,12 @@ ONNX_OPERATOR_SET_SCHEMA( - - const auto input_dim_value = input_dim.dim_value(); - -+ // empty dimension: output is always 0 regardless of starts/ends/steps -+ if (input_dim_value == 0) { -+ ctx.getOutputType(0)->mutable_tensor_type()->mutable_shape()->mutable_dim(axis)->set_dim_value(0); -+ continue; -+ } -+ - // process step - auto step = steps[axis_index]; - if (step == 0) { -diff --git a/onnx/defs/tensor/defs.cc b/onnx/defs/tensor/defs.cc -index 4e0c5f913..a7cd6171a 100644 ---- a/onnx/defs/tensor/defs.cc -+++ b/onnx/defs/tensor/defs.cc -@@ -794,6 +794,12 @@ static void processSliceInputs(const int64_t input_rank, int64_t& start, int64_ - if (step == 0) { - fail_shape_inference("'step' cannot be 0 for Slice"); - } -+ // empty dimension: start=0, end=0 so caller computes output_dim=0 -+ if (input_rank == 0) { -+ start = 0; -+ end = 0; -+ return; -+ } - // process start - if (start < 0) - start += input_rank; From 29fcbe3d5d56750ed41dcd293b9f1cc4fc7251ca Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 16 Mar 2026 21:35:31 +0000 Subject: [PATCH 15/18] Parallelize CumProd outer loop with thread pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the 4 sequential outer loops (forward/reverse × exclusive/non-exclusive) with concurrency::ThreadPool::TryBatchParallelFor. Each outer iteration processes an independent slice, making them safe to parallelize. Refactored from sequential pointer arithmetic (input_iter++/output_iter++) to index-based access using base offset = outer * dim * lower_dim_size, which is required for parallel execution where iterations cannot share mutable iterators. Agent-signed-off: Developer (257e49bb) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../core/providers/cpu/math/cumprod.cc | 138 +++++++++++------- 1 file changed, 84 insertions(+), 54 deletions(-) diff --git a/onnxruntime/core/providers/cpu/math/cumprod.cc b/onnxruntime/core/providers/cpu/math/cumprod.cc index 3c0646b3d6b9a..f03b19a7fea0d 100644 --- a/onnxruntime/core/providers/cpu/math/cumprod.cc +++ b/onnxruntime/core/providers/cpu/math/cumprod.cc @@ -9,6 +9,7 @@ #include "core/providers/cpu/tensor/utils.h" #include "core/framework/op_kernel.h" #include "core/framework/tensorprotoutils.h" +#include "core/platform/threadpool.h" namespace onnxruntime { @@ -146,67 +147,96 @@ Status CumProd::Compute(OpKernelContext* ctx) const { const int64_t lower_dim_size = // sizes of the slices we can treat as 1D arrays std::accumulate(input_shape.begin() + axis + 1, input_shape.end(), static_cast(1), std::multiplies()); - if (!reverse_) { - const auto* input_iter = input->Data(); - auto* output_iter = output_tensor.MutableData(); - const auto* prev_output_iter = output_iter; + const T* input_data = input->Data(); + T* output_data = output_tensor.MutableData(); + const int64_t slice_size = dim * lower_dim_size; + auto* tp = ctx->GetOperatorThreadPool(); + if (!reverse_) { if (exclusive_) { - for (int64_t outer = 0; outer < upper_dim_count; outer++) { - prev_output_iter = output_iter; - for (int64_t inner = 0; inner < lower_dim_size; inner++) { - *(output_iter++) = static_cast(1); - } - for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { - for (int64_t inner = 0; inner < lower_dim_size; inner++) { - *(output_iter++) = *(prev_output_iter++) * *(input_iter++); - } - } - input_iter += lower_dim_size; - } + concurrency::ThreadPool::TryBatchParallelFor( + tp, static_cast(upper_dim_count), + [&](ptrdiff_t outer) { + const int64_t base = outer * slice_size; + const T* in = input_data + base; + T* out = output_data + base; + + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[inner] = static_cast(1); + } + for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { + const int64_t curr_offset = cum_axis * lower_dim_size; + const int64_t prev_offset = (cum_axis - 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[curr_offset + inner] = out[prev_offset + inner] * in[prev_offset + inner]; + } + } + }, + 0); } else { - for (int64_t outer = 0; outer < upper_dim_count; outer++) { - prev_output_iter = output_iter; - for (int64_t inner = 0; inner < lower_dim_size; inner++) { - *(output_iter++) = *(input_iter++); - } - for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { - for (int64_t inner = 0; inner < lower_dim_size; inner++) { - *(output_iter++) = *(prev_output_iter++) * *(input_iter++); - } - } - } + concurrency::ThreadPool::TryBatchParallelFor( + tp, static_cast(upper_dim_count), + [&](ptrdiff_t outer) { + const int64_t base = outer * slice_size; + const T* in = input_data + base; + T* out = output_data + base; + + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[inner] = in[inner]; + } + for (int64_t cum_axis = 1; cum_axis < dim; cum_axis++) { + const int64_t curr_offset = cum_axis * lower_dim_size; + const int64_t prev_offset = (cum_axis - 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[curr_offset + inner] = out[prev_offset + inner] * in[curr_offset + inner]; + } + } + }, + 0); } } else { - const auto* input_iter = input->Data() + input->Shape().Size(); - auto* output_iter = output_tensor.MutableData() + output_shape.Size(); - const auto* prev_output_iter = output_iter; - if (exclusive_) { - for (int64_t outer = upper_dim_count - 1; outer >= 0; outer--) { - prev_output_iter = output_iter; - for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { - *(--output_iter) = static_cast(1); - } - for (int64_t cum_axis = dim - 1; cum_axis > 0; cum_axis--) { - for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { - *(--output_iter) = *(--prev_output_iter) * *(--input_iter); - } - } - input_iter -= lower_dim_size; - } + concurrency::ThreadPool::TryBatchParallelFor( + tp, static_cast(upper_dim_count), + [&](ptrdiff_t outer) { + const int64_t base = outer * slice_size; + const T* in = input_data + base; + T* out = output_data + base; + + const int64_t last_offset = (dim - 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[last_offset + inner] = static_cast(1); + } + for (int64_t cum_axis = dim - 2; cum_axis >= 0; cum_axis--) { + const int64_t curr_offset = cum_axis * lower_dim_size; + const int64_t next_offset = (cum_axis + 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[curr_offset + inner] = out[next_offset + inner] * in[next_offset + inner]; + } + } + }, + 0); } else { - for (int64_t outer = upper_dim_count - 1; outer >= 0; outer--) { - prev_output_iter = output_iter; - for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { - *(--output_iter) = *(--input_iter); - } - for (int64_t cum_axis = dim - 1; cum_axis > 0; cum_axis--) { - for (int64_t inner = lower_dim_size - 1; inner >= 0; inner--) { - *(--output_iter) = *(--prev_output_iter) * *(--input_iter); - } - } - } + concurrency::ThreadPool::TryBatchParallelFor( + tp, static_cast(upper_dim_count), + [&](ptrdiff_t outer) { + const int64_t base = outer * slice_size; + const T* in = input_data + base; + T* out = output_data + base; + + const int64_t last_offset = (dim - 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[last_offset + inner] = in[last_offset + inner]; + } + for (int64_t cum_axis = dim - 2; cum_axis >= 0; cum_axis--) { + const int64_t curr_offset = cum_axis * lower_dim_size; + const int64_t next_offset = (cum_axis + 1) * lower_dim_size; + for (int64_t inner = 0; inner < lower_dim_size; inner++) { + out[curr_offset + inner] = out[next_offset + inner] * in[curr_offset + inner]; + } + } + }, + 0); } } From ffe98ecbb8260f419149bf104f4fcdc198692910 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 23 Mar 2026 20:06:17 +0000 Subject: [PATCH 16/18] Update ONNX to 1.21.0rc4 Update ONNX dependency from 1.21.0rc3 to 1.21.0rc4 (commit c751ddbce897). RC4 includes bug fixes (Slice SIGABRT on empty dimensions) and security hardening (ExternalDataInfo attribute injection). Changes: - cmake/deps.txt: Updated archive URL and SHA1 hash - cmake/external/onnx: Updated submodule to rc4 commit - cmake/vcpkg-ports/onnx/portfile.cmake: Updated REF and SHA512 - 7 requirements.txt files: onnx==1.21.0rc4 Agent-signed-off: Developer (dc55daf6) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/deps.txt | 2 +- cmake/external/onnx | 2 +- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- onnxruntime/test/python/requirements.txt | 2 +- .../inference/aarch64/python/cpu/scripts/requirements.txt | 2 +- .../github/linux/docker/scripts/lort/requirements.txt | 2 +- .../github/linux/docker/scripts/manylinux/requirements.txt | 2 +- tools/ci_build/github/linux/docker/scripts/requirements.txt | 2 +- tools/ci_build/github/linux/python/requirements.txt | 2 +- tools/ci_build/github/windows/python/requirements.txt | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index b7d233d5b19a2..754151ea75d3b 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/e6c12c5fa7857729e081e2ec90f96dfefeb79b83.zip;de083cbccbd6e427e94deb263c63aa474059fd01 +onnx;https://github.com/onnx/onnx/archive/c751ddbce897302ab57802ec506a3ee0e41ae717.zip;0a7cdc4f43478098bbd44bf2e7a99a95d9e9c809 # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/external/onnx b/cmake/external/onnx index e6c12c5fa7857..c751ddbce8973 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit e6c12c5fa7857729e081e2ec90f96dfefeb79b83 +Subproject commit c751ddbce897302ab57802ec506a3ee0e41ae717 diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index c590d8bc70b4f..cc59eddc6642c 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF e6c12c5fa7857729e081e2ec90f96dfefeb79b83 - SHA512 f54b7020486a80ab4942d9e6aa9f7c393ca16b6a51717c7bd81cfbaf3cd9cbf8c45281f3bf55388dcf16885d8f0b8ae5656d8b5a3af15757aad7fa2f372a2be2 + REF c751ddbce897302ab57802ec506a3ee0e41ae717 + SHA512 b23c6ee83334b8b19db7106e18327546e0630f799f5b6355febf5d3c6d59eae133c9a7b1a2e6b35765ddde9478d32596af4c914891381b4018a82b665d59db64 PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index 42c5f0c12450e..361879108bebd 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.21.0rc3 +onnx==1.21.0rc4 pytest onnx-ir diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index cc94f92fb57d6..39ceca68de935 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.21.0rc3 +onnx==1.21.0rc4 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index 0207f6a3cf6f1..8c17b02dc25ea 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.21.0rc3 +onnx==1.21.0rc4 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 8eaa5d326975a..1a75a90ea6024 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.21.0rc3 +onnx==1.21.0rc4 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index c654621f00879..3228a052ae885 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.21.0rc3 +onnx==1.21.0rc4 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 71642228fee80..9eec8e52cc45d 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.21.0rc3 +onnx==1.21.0rc4 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index eae136cf452bd..24639d52e7ebe 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.21.0rc3 +onnx==1.21.0rc4 From ff7b87e3094fc837e708312757fb37c8cccaba46 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Mon, 30 Mar 2026 19:42:42 +0000 Subject: [PATCH 17/18] Update ONNX to official 1.21.0 release Update from rc4 to the official ONNX 1.21.0 release (tagged v1.21.0, commit be2b5fde82d9c8874f3d19328bdfe3b6962dc67b, March 27 2026). - cmake/deps.txt: commit hash + SHA1 - cmake/external/onnx: submodule to v1.21.0 - cmake/vcpkg-ports/onnx/portfile.cmake: REF + SHA512 - 7 requirements.txt files: onnx==1.21.0rc4 -> onnx==1.21.0 - All 4 patches verified to apply cleanly Agent-signed-off: Developer (257e49bb) [claude-opus-4.6] Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cmake/deps.txt | 2 +- cmake/external/onnx | 2 +- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- onnxruntime/test/python/requirements.txt | 2 +- .../inference/aarch64/python/cpu/scripts/requirements.txt | 2 +- .../github/linux/docker/scripts/lort/requirements.txt | 2 +- .../github/linux/docker/scripts/manylinux/requirements.txt | 2 +- tools/ci_build/github/linux/docker/scripts/requirements.txt | 2 +- tools/ci_build/github/linux/python/requirements.txt | 2 +- tools/ci_build/github/windows/python/requirements.txt | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 754151ea75d3b..95398b1a31f20 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/c751ddbce897302ab57802ec506a3ee0e41ae717.zip;0a7cdc4f43478098bbd44bf2e7a99a95d9e9c809 +onnx;https://github.com/onnx/onnx/archive/be2b5fde82d9c8874f3d19328bdfe3b6962dc67b.zip;451dd6ad7ffafc76ab26c9508adaa91d935f10ba # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/external/onnx b/cmake/external/onnx index c751ddbce8973..be2b5fde82d9c 160000 --- a/cmake/external/onnx +++ b/cmake/external/onnx @@ -1 +1 @@ -Subproject commit c751ddbce897302ab57802ec506a3ee0e41ae717 +Subproject commit be2b5fde82d9c8874f3d19328bdfe3b6962dc67b diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index cc59eddc6642c..2e372e048d8a1 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF c751ddbce897302ab57802ec506a3ee0e41ae717 - SHA512 b23c6ee83334b8b19db7106e18327546e0630f799f5b6355febf5d3c6d59eae133c9a7b1a2e6b35765ddde9478d32596af4c914891381b4018a82b665d59db64 + REF be2b5fde82d9c8874f3d19328bdfe3b6962dc67b + SHA512 486eb73a6e66f0d39bde2e5fbbe69ef2f6db46573714224824ee854d9a553ff47d642eb0526cd173c651e5f30093fcb0f97b4e0441ad77818cf0bc98ae422ad7 PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch diff --git a/onnxruntime/test/python/requirements.txt b/onnxruntime/test/python/requirements.txt index 361879108bebd..3ece2f39d4042 100644 --- a/onnxruntime/test/python/requirements.txt +++ b/onnxruntime/test/python/requirements.txt @@ -1,3 +1,3 @@ -onnx==1.21.0rc4 +onnx==1.21.0 pytest onnx-ir diff --git a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt index 39ceca68de935..b4c2f163e22ac 100644 --- a/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/inference/aarch64/python/cpu/scripts/requirements.txt @@ -7,4 +7,4 @@ wheel protobuf==4.25.8 sympy==1.14 flatbuffers -onnx==1.21.0rc4 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt index 8c17b02dc25ea..eb52681341012 100644 --- a/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/lort/requirements.txt @@ -3,7 +3,7 @@ beartype==0.15.0 flatbuffers cerberus h5py -onnx==1.21.0rc4 +onnx==1.21.0 # Python dependencies required for pytorch development astunparse expecttest!=0.2.0 diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt index 1a75a90ea6024..9a0a6d0f51900 100644 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt @@ -9,4 +9,4 @@ sympy==1.14 flatbuffers neural-compressor>=2.2.1 triton==3.5.0 -onnx==1.21.0rc4 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt index 3228a052ae885..3d886832e1ccb 100644 --- a/tools/ci_build/github/linux/docker/scripts/requirements.txt +++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt @@ -12,4 +12,4 @@ protobuf==6.33.0 packaging onnxscript==0.6.2 onnx-ir==0.1.16 -onnx==1.21.0rc4 +onnx==1.21.0 diff --git a/tools/ci_build/github/linux/python/requirements.txt b/tools/ci_build/github/linux/python/requirements.txt index 9eec8e52cc45d..bfe9ab0d8a508 100644 --- a/tools/ci_build/github/linux/python/requirements.txt +++ b/tools/ci_build/github/linux/python/requirements.txt @@ -12,4 +12,4 @@ onnxscript==0.6.2 onnx-ir==0.1.16 jinja2 markupsafe -onnx==1.21.0rc4 +onnx==1.21.0 diff --git a/tools/ci_build/github/windows/python/requirements.txt b/tools/ci_build/github/windows/python/requirements.txt index 24639d52e7ebe..2dfba37c6f381 100644 --- a/tools/ci_build/github/windows/python/requirements.txt +++ b/tools/ci_build/github/windows/python/requirements.txt @@ -14,4 +14,4 @@ jinja2 markupsafe semver packaging -onnx==1.21.0rc4 +onnx==1.21.0 From ea5315e0ee4d14d3baef87423bed62aa60e14a11 Mon Sep 17 00:00:00 2001 From: Ti-Tai Wang Date: Thu, 2 Apr 2026 20:06:42 +0000 Subject: [PATCH 18/18] fix SHA --- cmake/deps.txt | 2 +- cmake/vcpkg-ports/onnx/portfile.cmake | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/deps.txt b/cmake/deps.txt index 95398b1a31f20..3ab9b81663f87 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -34,7 +34,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.250325.1.zip;826c8bd47c2258ec61b8b218e031e5b33d27f761 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063 -onnx;https://github.com/onnx/onnx/archive/be2b5fde82d9c8874f3d19328bdfe3b6962dc67b.zip;451dd6ad7ffafc76ab26c9508adaa91d935f10ba +onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.21.0.zip;321d4acc807c8e0fb0bbcc0424a143dffde1e846 # Use the latest commit of 10.9-GA onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/d5dce67db7c2e64b07e055571f5ec06f7f254de2.zip;01114d3b67650857281fa50faa2e412130a63b69 protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa diff --git a/cmake/vcpkg-ports/onnx/portfile.cmake b/cmake/vcpkg-ports/onnx/portfile.cmake index 2e372e048d8a1..3450dcb2e80ce 100644 --- a/cmake/vcpkg-ports/onnx/portfile.cmake +++ b/cmake/vcpkg-ports/onnx/portfile.cmake @@ -3,8 +3,8 @@ vcpkg_check_linkage(ONLY_STATIC_LIBRARY) vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO onnx/onnx - REF be2b5fde82d9c8874f3d19328bdfe3b6962dc67b - SHA512 486eb73a6e66f0d39bde2e5fbbe69ef2f6db46573714224824ee854d9a553ff47d642eb0526cd173c651e5f30093fcb0f97b4e0441ad77818cf0bc98ae422ad7 + REF "v${VERSION}" + SHA512 3cee4c0fbc9e260e360a62a59e324e0b127a5749f958e0704989b407a4c1179c637ef86e41a406e7868537a62a11a821e3433005eb0725f979145f8d514926bd PATCHES fix-cmakelists.patch fix-dependency-protobuf.patch