diff --git a/docs/How_To_Update_ONNX_Dev_Notes.md b/docs/How_To_Update_ONNX_Dev_Notes.md index 199e6671f6a1a..997812d7e7acf 100644 --- a/docs/How_To_Update_ONNX_Dev_Notes.md +++ b/docs/How_To_Update_ONNX_Dev_Notes.md @@ -36,10 +36,10 @@ This file should be generated. See [cgmanifests/README](/cgmanifests/README.md) 1. If there is a build failure in stage "Check out of dated documents" in WebAssembly CI pipeline, update ONNX Runtime Web WebGL operator support document: - Make sure Node.js is installed (see [Prerequisites](../js/README.md#Prerequisites) for instructions). - - Follow step 1 in [js/Build](../js/README.md#Build-2) to install dependencies). + - Follow [js/Build](../js/README.md#Build-2) to install dependencies. - Follow instructions in [Generate document](../js/README.md#Generating-Document) to update document. Commit changes applied to file `docs/operators.md`. -1. Usually some newly introduced tests will fail. Then you may need to update +2. Usually some newly introduced tests will fail. Then you may need to update - [onnxruntime/test/onnx/main.cc](/onnxruntime/test/onnx/main.cc) - [onnxruntime/test/providers/cpu/model_tests.cc](/onnxruntime/test/providers/cpu/model_tests.cc) - [csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs](/csharp/test/Microsoft.ML.OnnxRuntime.Tests.NetCoreApp/InferenceTest.netcore.cs) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index eeb8ebb3ccefe..d6053d258b029 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -19,8 +19,10 @@ Do not modify directly.* |**Operator Domain:** *ai.onnx*|||| |Abs|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[6, 12]|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Acos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Acosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| +|Acos|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Acosh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| |Add|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| @@ -33,11 +35,16 @@ Do not modify directly.* |ArgMin|*in* data:**T**
*out* reduced:**tensor(int64)**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[11, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| |||[1, 10]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(int8), tensor(uint8)| -|Asin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Asinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| -|Atan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Atanh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| -|AveragePool|*in* X:**T**
*out* Y:**T**|19+|**T** = tensor(float)| +|Asin|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Asinh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| +|Atan|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Atanh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| +|AveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[19, 21]|**T** = tensor(float)| |||[11, 18]|**T** = tensor(float)| |||10|**T** = tensor(float)| |||[7, 9]|**T** = tensor(float)| @@ -72,13 +79,17 @@ Do not modify directly.* |ConstantOfShape|*in* input:**T1**
*out* output:**T2**|21+|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||20|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[9, 19]|**T1** = tensor(int64)
**T2** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float)| +|Conv|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| |ConvInteger|*in* x:**T1**
*in* w:**T2**
*in* x_zero_point:**T1**
*in* w_zero_point:**T2**
*out* y:**T3**|10+|**T1** = tensor(uint8)
**T2** = tensor(uint8)
**T3** = tensor(int32)| -|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|11+|**T** = tensor(float)| +|ConvTranspose|*in* X:**T**
*in* W:**T**
*in* B:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| -|Cos|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| -|Cosh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| +|Cos|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| +|Cosh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| |Crop|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |CumSum|*in* x:**T**
*in* axis:**T2**
*out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| |||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T2** = tensor(int32), tensor(int64)| @@ -91,18 +102,21 @@ Do not modify directly.* |||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)| |||[13, 18]|**T** = tensor(int32), tensor(int8), tensor(uint8)| |||[10, 12]|**T** = tensor(int32), tensor(int8), tensor(uint8)| -|Det|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(float)| +|Det|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[11, 21]|**T** = tensor(float)| |Div|*in* A:**T**
*in* B:**T**
*out* C:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||13|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| |||[7, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|13+|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| +|Dropout|*in* data:**T**
*in* ratio:**T1**
*in* training_mode:**T2**
*out* output:**T**
*out* mask:**T2**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T**

or

*in* data:**T**
*out* output:**T**
*out* mask:**T1**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| +|||[13, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |||12|**T** = tensor(double), tensor(float)
**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |||[10, 11]|**T** = tensor(double), tensor(float), tensor(float16)
**T1** = tensor(bool)| |||[7, 9]|**T** = tensor(double), tensor(float), tensor(float16)| |DynamicQuantizeLinear|*in* x:**T1**
*out* y:**T2**
*out* y_scale:**tensor(float)**
*out* y_zero_point:**T2**|11+|**T2** = tensor(uint8)| |DynamicSlice|*in* data:**T**
*in* starts:**Tind**
*in* ends:**Tind**
*in* axes:**Tind**
*out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| |Einsum|*in* Inputs:**T**
*out* Output:**T**|12+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)| -|Elu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|Elu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |Equal|*in* A:**T**
*in* B:**T**
*out* C:**T1**|19+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(string)
**T1** = tensor(bool)| |||[13, 18]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[11, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| @@ -113,7 +127,8 @@ Do not modify directly.* |||[6, 12]|**T** = tensor(double), tensor(float)| |Expand|*in* input:**T**
*in* shape:**tensor(int64)**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[8, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|EyeLike|*in* input:**T1**
*out* output:**T2**|9+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| +|EyeLike|*in* input:**T1**
*out* output:**T2**|22+|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| +|||[9, 21]|**T1** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)
**T2** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint64)| |Flatten|*in* input:**T**
*out* output:**T**|21+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[13, 20]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -121,7 +136,8 @@ Do not modify directly.* |||[1, 8]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |Floor|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float)| |||[6, 12]|**T** = tensor(double), tensor(float)| -|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|GRU|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |Gather|*in* data:**T**
*in* indices:**Tind**
*out* output:**T**|13+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**Tind** = tensor(int32), tensor(int64)| @@ -136,19 +152,23 @@ Do not modify directly.* |||[11, 12]|**T** = tensor(double), tensor(float)| |||[9, 10]|**T** = tensor(double), tensor(float)| |||[7, 8]|**T** = tensor(double), tensor(float)| -|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| +|GlobalAveragePool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |GlobalLpPool|*in* X:**T**
*out* Y:**T**|2+|**T** = tensor(float)| -|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| +|GlobalMaxPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |Greater|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[9, 12]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[7, 8]|**T** = tensor(double), tensor(float)
**T1** = tensor(bool)| |GreaterOrEqual|*in* A:**T**
*in* B:**T**
*out* C:**T1**|16+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| |||[12, 15]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)
**T1** = tensor(bool)| -|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|20+|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| +|GridSample|*in* X:**T1**
*in* grid:**T2**
*out* Y:**T1**|22+|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| +|||[20, 21]|**T1** = tensor(double), tensor(float)
**T2** = tensor(double), tensor(float)| |||[16, 19]|**T1** = tensor(float)
**T2** = tensor(float)| |HammingWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |HannWindow|*in* size:**T1**
*out* output:**T2**|17+|**T1** = tensor(int32), tensor(int64)
**T2** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|HardSigmoid|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|HardSigmoid|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |Hardmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float)| |||[11, 12]|**T** = tensor(float)| |||[1, 10]|**T** = tensor(float)| @@ -165,7 +185,8 @@ Do not modify directly.* |||[11, 12]|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**B** = tensor(bool)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |ImageScaler|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| -|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|6+|**T** = tensor(float)| +|InstanceNormalization|*in* input:**T**
*in* scale:**T**
*in* B:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |IsInf|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)
**T2** = tensor(bool)| |||[10, 19]|**T1** = tensor(double), tensor(float)
**T2** = tensor(bool)| |IsNaN|*in* X:**T1**
*out* Y:**T2**|20+|**T1** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz)
**T2** = tensor(bool)| @@ -173,7 +194,8 @@ Do not modify directly.* |||[9, 12]|**T1** = tensor(double), tensor(float), tensor(float16)
**T2** = tensor(bool)| |LRN|*in* X:**T**
*out* Y:**T**|13+|**T** = tensor(float)| |||[1, 12]|**T** = tensor(float)| -|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|14+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|22+|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(double), tensor(float)
**T1** = tensor(int32)| |LayerNormalization|*in* X:**T**
*in* Scale:**T**
*in* B:**T**
*out* Y:**T**
*out* Mean:**U**
*out* InvStdDev:**U**

or

*in* X:**T**
*in* Scale:**V**
*in* B:**V**
*out* Y:**V**
*out* Mean:**U**
*out* InvStdDev:**U**|17+|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(float)| |||[1, 16]|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(double), tensor(float), tensor(float16)
**V** = tensor(double), tensor(float), tensor(float16)| @@ -196,7 +218,8 @@ Do not modify directly.* |||[11, 12]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[1, 10]|**B** = tensor(bool)
**I** = tensor(int64)
**V** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |LpNormalization|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(double), tensor(float)| -|LpPool|*in* X:**T**
*out* Y:**T**|18+|**T** = tensor(float)| +|LpPool|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[18, 21]|**T** = tensor(float)| |||[11, 17]|**T** = tensor(float)| |||[2, 10]|**T** = tensor(float)| |MatMul|*in* A:**T**
*in* B:**T**
*out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| @@ -207,11 +230,13 @@ Do not modify directly.* |||12|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)| |||[8, 11]|**T** = tensor(double), tensor(float)| |||[6, 7]|**T** = tensor(float)| -|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|12+|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| +|MaxPool|*in* X:**T**
*out* Y:**T**

or

*in* X:**T**
*out* Y:**T**
*out* Indices:**I**|22+|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| +|||[12, 21]|**I** = tensor(int64)
**T** = tensor(double), tensor(float), tensor(int8), tensor(uint8)| |||[8, 11]|**I** = tensor(int64)
**T** = tensor(double), tensor(float)| |||[1, 7]|**T** = tensor(float)| |MaxRoiPool|*in* X:**T**
*in* rois:**T**
*out* Y:**T**|1+|**T** = tensor(float)| -|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|11+|**T1** = tensor(float)
**T2** = tensor(int64)| +|MaxUnpool|*in* X:**T1**
*in* I:**T2**
*in* output_shape:**T2**
*out* output:**T1**|22+|**T1** = tensor(float)
**T2** = tensor(int64)| +|||[11, 21]|**T1** = tensor(float)
**T2** = tensor(int64)| |||[9, 10]|**T1** = tensor(float)
**T2** = tensor(int64)| |Mean|*in* data_0:**T**
*out* mean:**T**|13+|**T** = tensor(float)| |||[8, 12]|**T** = tensor(float)| @@ -264,7 +289,8 @@ Do not modify directly.* |||[19, 20]|**T1** = tensor(float), tensor(float16)
**T2** = tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(int8), tensor(uint8)| |||[13, 18]|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| |||[10, 12]|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| -|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|14+|**T** = tensor(float)
**T1** = tensor(int32)| +|RNN|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*out* Y:**T**
*out* Y_h:**T**|22+|**T** = tensor(float)
**T1** = tensor(int32)| +|||[14, 21]|**T** = tensor(float)
**T1** = tensor(int32)| |||[7, 13]|**T** = tensor(float)
**T1** = tensor(int32)| |RandomNormal|*out* output:**T**|1+|**T** = tensor(double), tensor(float)| |RandomNormalLike|*in* input:**T1**
*out* output:**T2**|1+|**T1** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T2** = tensor(double), tensor(float)| @@ -334,7 +360,8 @@ Do not modify directly.* |ReverseSequence|*in* input:**T**
*in* sequence_lens:**tensor(int64)**
*out* Y:**T**|10+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |RoiAlign|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*out* Y:**T1**|16+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| |||[10, 15]|**T1** = tensor(double), tensor(float)
**T2** = tensor(int64)| -|Round|*in* X:**T**
*out* Y:**T**|11+|**T** = tensor(double), tensor(float), tensor(float16)| +|Round|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(double), tensor(float), tensor(float16)| +|||[11, 21]|**T** = tensor(double), tensor(float), tensor(float16)| |STFT|*in* signal:**T1**
*in* frame_step:**T2**
*in* window:**T1**
*in* frame_length:**T2**
*out* output:**T1**|17+|**T1** = tensor(double), tensor(float)
**T2** = tensor(int32), tensor(int64)| |Scale|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| |ScaledTanh|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| @@ -353,7 +380,8 @@ Do not modify directly.* |||[16, 17]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[13, 15]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[11, 12]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| -|Selu|*in* X:**T**
*out* Y:**T**|6+|**T** = tensor(float)| +|Selu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[6, 21]|**T** = tensor(float)| |SequenceAt|*in* input_sequence:**S**
*in* position:**I**
*out* tensor:**T**|11+|**I** = tensor(int32), tensor(int64)
**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SequenceConstruct|*in* inputs:**T**
*out* output_sequence:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))
**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SequenceEmpty|*out* output:**S**|11+|**S** = seq(tensor(bfloat16)), seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(string)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8))| @@ -371,8 +399,10 @@ Do not modify directly.* |Sign|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||[9, 12]|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |SimplifiedLayerNormalization|*in* X:**T**
*in* scale:**V**
*out* Y:**V**
*out* inv_std_var:**U**|1+|**T** = tensor(double), tensor(float), tensor(float16)
**U** = tensor(double), tensor(float), tensor(float16)
**V** = tensor(double), tensor(float), tensor(float16)| -|Sin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(double), tensor(float)| -|Sinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float)| +|Sin|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(double), tensor(float)| +|||[7, 21]|**T** = tensor(double), tensor(float)| +|Sinh|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[9, 21]|**T** = tensor(float)| |Size|*in* data:**T**
*out* size:**T1**|21+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[19, 20]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[13, 18]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| @@ -384,8 +414,10 @@ Do not modify directly.* |Softmax|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[11, 12]|**T** = tensor(double), tensor(float)| |||[1, 10]|**T** = tensor(double), tensor(float)| -|Softplus|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| -|Softsign|*in* input:**T**
*out* output:**T**|1+|**T** = tensor(float)| +|Softplus|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| +|Softsign|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[1, 21]|**T** = tensor(float)| |SpaceToDepth|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[1, 12]|**T** = tensor(double), tensor(float)| |Split|*in* input:**T**
*in* split:**T**
*out* outputs...:**T**

or

*in* input:**T**
*in* split:**tensor(int64)**
*out* outputs:**T**

or

*in* input:**T**
*out* outputs:**T**|18+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| @@ -408,11 +440,13 @@ Do not modify directly.* |Sum|*in* data_0:**T**
*out* sum:**T**|13+|**T** = tensor(double), tensor(float)| |||[8, 12]|**T** = tensor(double), tensor(float)| |||[6, 7]|**T** = tensor(double), tensor(float)| -|Tan|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float)| +|Tan|*in* input:**T**
*out* output:**T**|22+|**T** = tensor(float)| +|||[7, 21]|**T** = tensor(float)| |Tanh|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float)| |||[6, 12]|**T** = tensor(double), tensor(float)| |TfIdfVectorizer|*in* X:**T**
*out* Y:**T1**|9+|**T** = tensor(int32), tensor(int64), tensor(string)
**T1** = tensor(float)| -|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|10+|**T** = tensor(float)| +|ThresholdedRelu|*in* X:**T**
*out* Y:**T**|22+|**T** = tensor(float)| +|||[10, 21]|**T** = tensor(float)| |||[1, 9]|**T** = tensor(float)| |Tile|*in* input:**T**
*in* repeats:**T1**
*out* output:**T**

or

*in* input:**T**
*in* tiles:**T**
*in* axis:**T**
*out* output:**T**|13+|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| |||[6, 12]|**T** = tensor(bool), tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| diff --git a/onnxruntime/core/providers/cpu/activation/activations.cc b/onnxruntime/core/providers/cpu/activation/activations.cc index 049fee4b95308..71cba37b52446 100644 --- a/onnxruntime/core/providers/cpu/activation/activations.cc +++ b/onnxruntime/core/providers/cpu/activation/activations.cc @@ -33,8 +33,10 @@ namespace onnxruntime { op, since_version, type, \ KernelDefBuilder().MayInplace(0, 0).TypeConstraint("T", DataTypeImpl::GetTensorType()), op); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Elu, 6); -REGISTER_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 6); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Elu, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Elu, 22); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(HardSigmoid, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(LeakyRelu, 6, 15); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Relu, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Relu, 6, 12, double); @@ -52,19 +54,23 @@ REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(LeakyRelu, 6, 15, MLFloat16); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(LeakyRelu, 16, MLFloat16); #endif // MLAS_F16VEC_INTRINSICS_SUPPORTED -REGISTER_UNARY_ELEMENTWISE_KERNEL(Selu, 6); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Selu, 6, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Selu, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 6, 12, double); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 13, float); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Sigmoid, 13, double); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Softplus, 1); -REGISTER_UNARY_ELEMENTWISE_KERNEL(Softsign, 1); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Softplus, 1, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Softplus, 22); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(Softsign, 1, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(Softsign, 22); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 6, 12, float); REGISTER_VERSIONED_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 6, 12, double); REGISTER_UNARY_ELEMENTWISE_KERNEL(Celu, 12); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 13, float); REGISTER_UNARY_ELEMENTWISE_TYPED_KERNEL(Tanh, 13, double); -REGISTER_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 10); +REGISTER_VERSIONED_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 10, 21); +REGISTER_UNARY_ELEMENTWISE_KERNEL(ThresholdedRelu, 22); // Opset-16 adds BFloat16 to allowed types for the LeakyRelu operator REGISTER_UNARY_ELEMENTWISE_KERNEL(LeakyRelu, 16); diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index 0499a15e1df0a..4079f9ffc2292 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -40,8 +40,8 @@ std::vector CPUExecutionProvider::CreatePreferredAllocators() { // Forward declarations of op kernels class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 10, Clip); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Elu); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, HardSigmoid); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, Elu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, HardSigmoid); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 15, LeakyRelu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Relu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Relu); @@ -49,11 +49,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, MLFloat16, Relu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 15, MLFloat16, LeakyRelu); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, Selu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, Selu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Sigmoid); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Sigmoid); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Softplus); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, Softsign); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, Softplus); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, Softsign); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, float, Tanh); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 12, double, Tanh); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, PRelu); @@ -129,13 +129,13 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 10, double, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 7, float, Mean); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 8, 12, float, Mean); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, float, Sin); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, double, Sin); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Cos); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Tan); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Asin); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Acos); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, Atan); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, float, Sin); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, double, Sin); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Cos); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Tan); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Asin); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Acos); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 21, Atan); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, float, Gemm); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 7, 8, double, Gemm); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, Hardmax); @@ -154,7 +154,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, Conv); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, ConvTranspose); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 8, Flatten); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, InstanceNormalization); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 6, 21, InstanceNormalization); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, float, LpNormalization); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, double, LpNormalization); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 12, LRN); @@ -166,11 +166,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 2, 10, LpPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 2, GlobalLpPool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, GlobalAveragePool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, GlobalAveragePool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, MLFloat16, GlobalAveragePool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, MLFloat16, GlobalAveragePool); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, GlobalMaxPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 21, GlobalMaxPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, MaxRoiPool); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, float, ReduceL1); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 1, 10, double, ReduceL1); @@ -286,7 +286,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, double, Less); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, int32_t, Less); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, int64_t, Less); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, EyeLike); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, EyeLike); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, float, IsNaN); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, double, IsNaN); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 12, MLFloat16, IsNaN); @@ -316,11 +316,11 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, int64_t_float_int32_t, OneHot); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, MaxUnpool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Sinh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Cosh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Asinh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Acosh); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, Atanh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Sinh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Cosh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Asinh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Acosh); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 21, Atanh); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, Scan); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, 10, Scatter); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 9, TfIdfVectorizer); @@ -362,7 +362,7 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, int32_t, Resize); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, int8_t, Resize); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 10, uint8_t, Resize); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, ThresholdedRelu); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 21, ThresholdedRelu); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, uint8_t, DequantizeLinear); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, 12, int8_t, @@ -401,9 +401,9 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, int64_t, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, float, Equal); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, double, Equal); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, float, Round); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, double, Round); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MLFloat16, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, float, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, double, Round); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MLFloat16, Round); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, DynamicQuantizeLinear); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, float, ArgMax); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, double, ArgMax); @@ -483,19 +483,19 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Split); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Squeeze); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, Unsqueeze); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Det); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, Det); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, ScatterElements); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, NonMaxSuppression); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 18, AveragePool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MaxUnpool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MaxUnpool); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 17, LpPool); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Conv); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, Conv); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, MLFloat16, Conv); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, MLFloat16, Conv); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 18, MLFloat16, AveragePool); #endif -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, ConvTranspose); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 21, ConvTranspose); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, 12, If); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, SequenceLength); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, SequenceAt); @@ -541,9 +541,9 @@ class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDoma class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Min); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Max); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MaxPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 21, MaxPool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, MLFloat16, MaxPool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 21, MLFloat16, MaxPool); #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, Pow); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 12, 12, float, ReduceMax); @@ -633,10 +633,10 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 20, Flatten); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, LRN); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, MeanVarianceNormalization); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float_float, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float_double, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double_float, Dropout); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double_double, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, float_float, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, float_double, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, double_float, Dropout); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, 21, double_double, Dropout); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, float, ArgMax); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, double, ArgMax); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 13, int8_t, ArgMax); @@ -844,9 +844,9 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOn BatchNormalization); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 14, double, BatchNormalization); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, GRU); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, LSTM); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, RNN); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, GRU); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, LSTM); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 14, 21, RNN); // Opset 15 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 15, Pow); @@ -946,7 +946,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, double, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int32_t, ReduceSumSquare); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int64_t, ReduceSumSquare); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, LpPool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, 21, LpPool); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Col2Im); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int8_t, BitwiseAnd); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, int16_t, BitwiseAnd); @@ -992,9 +992,9 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 18, Op // Opset 19 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, Size); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, AveragePool); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 21, AveragePool); #ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, MLFloat16, AveragePool); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 21, MLFloat16, AveragePool); #endif class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, Cast); class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 19, 20, int32_t, @@ -1062,8 +1062,8 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, int8_t, ReduceMin); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, uint8_t, ReduceMin); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, DFT); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, GridSample); -class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, GridSample); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, 21, float, GridSample); +class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, 21, double, GridSample); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, AffineGrid); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, double, AffineGrid); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 20, float, IsNaN); @@ -1125,6 +1125,56 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 21, Float8E5M2FNUZ, QuantizeLinear); #endif +// Opset 22 +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Acos); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Cos); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Tan); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Asin); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Atan); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Sinh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Cosh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Asinh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Acosh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Atanh); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Conv); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, ConvTranspose); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Det); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float_float, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float_double, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double_float, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double_double, Dropout); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, GridSample); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, GridSample); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Elu); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, EyeLike); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GlobalAveragePool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GlobalMaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, GRU); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, LSTM); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, RNN); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, HardSigmoid); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, InstanceNormalization); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, LpPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MaxPool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MaxUnpool); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Softplus); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, Round); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, Round); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, Round); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Selu); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, float, Sin); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, double, Sin); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, Softsign); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, ThresholdedRelu); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, AveragePool); + +#ifdef MLAS_F16VEC_INTRINSICS_SUPPORTED +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, Conv); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, GlobalAveragePool); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, MaxPool); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 22, MLFloat16, AveragePool); +#endif + // !!PLEASE READ BELOW!! Following that, add new entries above this comment /* *** IMPORTANT! *** @@ -1168,21 +1218,21 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1567,7 +1617,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { int32_t, Less)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1678,7 +1728,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { int8_t, Resize)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -1954,7 +2004,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2081,14 +2131,14 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2389,9 +2439,9 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { float, BatchNormalization)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, // Opset 15 BuildKernelCreateInfo, @@ -2556,7 +2606,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { ReduceSumSquare)>, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2633,7 +2683,7 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { // Opset 19 BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, @@ -2708,10 +2758,10 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, #endif + + // Opset 22 + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { @@ -2819,18 +2917,26 @@ Status RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { Status RegisterFp16Kernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing - BuildKernelCreateInfo, + BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, - BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo()), + FusedConvFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( Conv, - 11, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), FusedConvFp16); diff --git a/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc b/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc index 7c1e05f7ce277..9729500d0e1ff 100644 --- a/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc +++ b/onnxruntime/core/providers/cpu/fp16/fp16_pool.cc @@ -224,9 +224,16 @@ ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + MaxPool, + 12, 21, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( MaxPool, - 12, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); @@ -237,16 +244,30 @@ ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + AveragePool, + 19, 21, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + ONNX_CPU_OPERATOR_TYPED_KERNEL( AveragePool, - 19, + 22, + MLFloat16, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + PoolFp16); + +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + GlobalAveragePool, + 1, 21, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); ONNX_CPU_OPERATOR_TYPED_KERNEL( GlobalAveragePool, - 1, + 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), PoolFp16); diff --git a/onnxruntime/core/providers/cpu/math/det.cc b/onnxruntime/core/providers/cpu/math/det.cc index b32b44cf942c8..dc3f11d84393f 100644 --- a/onnxruntime/core/providers/cpu/math/det.cc +++ b/onnxruntime/core/providers/cpu/math/det.cc @@ -13,9 +13,17 @@ using namespace onnxruntime::common; namespace onnxruntime { -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Det, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Det); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Det, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Det); diff --git a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc index a78ff69e5c894..587ed882a2050 100644 --- a/onnxruntime/core/providers/cpu/math/element_wise_ops.cc +++ b/onnxruntime/core/providers/cpu/math/element_wise_ops.cc @@ -1337,16 +1337,30 @@ class Sin final : public OpKernel { } }; +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + Sin, + 7, 21, + float, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sin); + +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL( + Sin, + 7, 21, + double, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sin); + ONNX_CPU_OPERATOR_TYPED_KERNEL( Sin, - 7, + 22, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sin); ONNX_CPU_OPERATOR_TYPED_KERNEL( Sin, - 7, + 22, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sin); @@ -1365,9 +1379,17 @@ class Cos final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Cos, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Cos); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Cos, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Cos); @@ -1385,9 +1407,15 @@ class Tan final : public OpKernel { } }; +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + Tan, + 7, 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Tan); + ONNX_CPU_OPERATOR_KERNEL( Tan, - 7, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Tan); @@ -1405,9 +1433,17 @@ class Asin final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Asin, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Asin); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Asin, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Asin); @@ -1425,9 +1461,17 @@ class Acos final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Acos, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Acos); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Acos, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Acos); @@ -1445,9 +1489,17 @@ class Atan final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Atan, 7, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Atan); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Atan, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Atan); @@ -1465,9 +1517,17 @@ class Sinh final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Sinh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Sinh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Sinh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Sinh); @@ -1485,9 +1545,17 @@ class Cosh final : public OpKernel { } }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Cosh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Cosh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Cosh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Cosh); @@ -1517,9 +1585,17 @@ class Asinh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Asinh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Asinh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Asinh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Asinh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Asinh); @@ -1549,9 +1625,17 @@ class Acosh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Acosh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Acosh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Acosh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Acosh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Acosh); @@ -1581,9 +1665,17 @@ class Atanh final : public OpKernel { ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(Atanh); }; -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Atanh, 9, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Atanh); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Atanh, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Atanh); diff --git a/onnxruntime/core/providers/cpu/math/round.cc b/onnxruntime/core/providers/cpu/math/round.cc index 86c2b91c10535..86be7cce43e9b 100644 --- a/onnxruntime/core/providers/cpu/math/round.cc +++ b/onnxruntime/core/providers/cpu/math/round.cc @@ -12,9 +12,13 @@ namespace onnxruntime { -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); -ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 11, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(Round, 11, 21, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); + +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, MLFloat16, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, float, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); +ONNX_CPU_OPERATOR_TYPED_KERNEL(Round, 22, double, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Round); template Status Round::Compute(OpKernelContext* ctx) const { diff --git a/onnxruntime/core/providers/cpu/nn/Unpool.cc b/onnxruntime/core/providers/cpu/nn/Unpool.cc index fc5744e24cea6..5997dcedebfd7 100644 --- a/onnxruntime/core/providers/cpu/nn/Unpool.cc +++ b/onnxruntime/core/providers/cpu/nn/Unpool.cc @@ -23,9 +23,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T2", DataTypeImpl::GetTensorType()), MaxUnpool); +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + MaxUnpool, + 11, 21, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + MaxUnpool); + ONNX_CPU_OPERATOR_KERNEL( MaxUnpool, - 11, + 22, KernelDefBuilder() .TypeConstraint("T1", DataTypeImpl::GetTensorType()) .TypeConstraint("T2", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/nn/conv.cc b/onnxruntime/core/providers/cpu/nn/conv.cc index 51dfb143fb916..d10213f55d5d4 100644 --- a/onnxruntime/core/providers/cpu/nn/conv.cc +++ b/onnxruntime/core/providers/cpu/nn/conv.cc @@ -300,9 +300,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Conv); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( Conv, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + Conv); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + Conv, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), Conv); diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc index f0c1b0b409831..4e0b560e2ec3c 100644 --- a/onnxruntime/core/providers/cpu/nn/conv_transpose.cc +++ b/onnxruntime/core/providers/cpu/nn/conv_transpose.cc @@ -30,9 +30,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), ConvTranspose); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( ConvTranspose, 11, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + ConvTranspose); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + ConvTranspose, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), ConvTranspose); diff --git a/onnxruntime/core/providers/cpu/nn/dropout_op.cc b/onnxruntime/core/providers/cpu/nn/dropout_op.cc index 8b5ce947577bf..fb475ebc0e99f 100644 --- a/onnxruntime/core/providers/cpu/nn/dropout_op.cc +++ b/onnxruntime/core/providers/cpu/nn/dropout_op.cc @@ -19,12 +19,10 @@ namespace onnxruntime { Dropout); #define REGISTER_KERNEL_TYPED(OpName, VER, T1, T2) \ - ONNX_OPERATOR_TYPED_KERNEL_EX( \ + ONNX_CPU_OPERATOR_TYPED_KERNEL( \ OpName, \ - kOnnxDomain, \ VER, \ T1##_##T2, \ - kCpuExecutionProvider, \ KernelDefBuilder() \ .TypeConstraint("T", DataTypeImpl::GetTensorType()) \ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ @@ -42,8 +40,14 @@ REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, float, double) REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, double, float) REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 12, 12, double, double) -REGISTER_KERNEL_TYPED(Dropout, 13, float, float) -REGISTER_KERNEL_TYPED(Dropout, 13, float, double) -REGISTER_KERNEL_TYPED(Dropout, 13, double, float) -REGISTER_KERNEL_TYPED(Dropout, 13, double, double) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, float, float) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, float, double) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, double, float) +REGISTER_KERNEL_VERSIONED_TYPED(Dropout, 13, 21, double, double) + +// Opset 22 supports BFloat16 +REGISTER_KERNEL_TYPED(Dropout, 22, float, float) +REGISTER_KERNEL_TYPED(Dropout, 22, float, double) +REGISTER_KERNEL_TYPED(Dropout, 22, double, float) +REGISTER_KERNEL_TYPED(Dropout, 22, double, double) } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/nn/instance_norm.cc b/onnxruntime/core/providers/cpu/nn/instance_norm.cc index 4eee9a8409cae..5d4e01808143f 100644 --- a/onnxruntime/core/providers/cpu/nn/instance_norm.cc +++ b/onnxruntime/core/providers/cpu/nn/instance_norm.cc @@ -8,9 +8,16 @@ using namespace ::onnxruntime::common; namespace onnxruntime { -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( InstanceNormalization, 6, + 21, + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), + InstanceNorm); + +ONNX_CPU_OPERATOR_KERNEL( + InstanceNormalization, + 22, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), InstanceNorm); diff --git a/onnxruntime/core/providers/cpu/nn/pool.cc b/onnxruntime/core/providers/cpu/nn/pool.cc index 9230398680a64..d6b9ed693432b 100644 --- a/onnxruntime/core/providers/cpu/nn/pool.cc +++ b/onnxruntime/core/providers/cpu/nn/pool.cc @@ -399,29 +399,26 @@ Status LpPoolV18::Compute(OpKernelContext* context) const { return Status::OK(); } -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 7, 9, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 10, 10, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(AveragePool, 11, 18, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_KERNEL(AveragePool, 19, - KernelDefBuilder() - .TypeConstraint( - "T", - DataTypeImpl::GetTensorType()), - AveragePoolV19); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 1, 7, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool>); - +#define REGISTER_KERNEL_VERSIONED(OpName, START_VER, END_VER, ...) \ + ONNX_CPU_OPERATOR_VERSIONED_KERNEL( \ + OpName, \ + START_VER, \ + END_VER, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), __VA_ARGS__); + +#define REGISTER_KERNEL(OpName, VER, ...) \ + ONNX_CPU_OPERATOR_KERNEL( \ + OpName, \ + VER, \ + KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), __VA_ARGS__); + +REGISTER_KERNEL_VERSIONED(AveragePool, 7, 9, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 10, 10, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 11, 18, Pool); +REGISTER_KERNEL_VERSIONED(AveragePool, 19, 21, AveragePoolV19); +REGISTER_KERNEL(AveragePool, 22, AveragePoolV19); + +REGISTER_KERNEL_VERSIONED(MaxPool, 1, 7, Pool>); ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 8, 11, KernelDefBuilder() .TypeConstraint( @@ -430,7 +427,14 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 8, 11, .TypeConstraint("I", DataTypeImpl::GetTensorType()), MaxPoolV8); -ONNX_CPU_OPERATOR_KERNEL(MaxPool, 12, +ONNX_CPU_OPERATOR_VERSIONED_KERNEL(MaxPool, 12, 21, + KernelDefBuilder() + .TypeConstraint( + "T", + BuildKernelDefConstraintsFromTypeList()) + .TypeConstraint("I", DataTypeImpl::GetTensorType()), + MaxPoolV8); +ONNX_CPU_OPERATOR_KERNEL(MaxPool, 22, KernelDefBuilder() .TypeConstraint( "T", @@ -438,29 +442,17 @@ ONNX_CPU_OPERATOR_KERNEL(MaxPool, 12, .TypeConstraint("I", DataTypeImpl::GetTensorType()), MaxPoolV8); -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 2, 10, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LpPool, 11, 17, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); - -ONNX_CPU_OPERATOR_KERNEL(LpPool, 18, - KernelDefBuilder() - .TypeConstraint( - "T", - DataTypeImpl::GetTensorType()), - LpPoolV18); +REGISTER_KERNEL_VERSIONED(LpPool, 2, 10, Pool); +REGISTER_KERNEL_VERSIONED(LpPool, 11, 17, Pool); +REGISTER_KERNEL_VERSIONED(LpPool, 18, 21, LpPoolV18); +REGISTER_KERNEL(LpPool, 22, LpPoolV18); -ONNX_CPU_OPERATOR_KERNEL(GlobalLpPool, 2, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); +REGISTER_KERNEL(GlobalLpPool, 2, Pool); -ONNX_CPU_OPERATOR_KERNEL(GlobalAveragePool, 1, - KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool); +REGISTER_KERNEL_VERSIONED(GlobalAveragePool, 1, 21, Pool); +REGISTER_KERNEL(GlobalAveragePool, 22, Pool); -ONNX_CPU_OPERATOR_KERNEL(GlobalMaxPool, 1, KernelDefBuilder().TypeConstraint("T", DataTypeImpl::GetTensorType()), - Pool>); +REGISTER_KERNEL_VERSIONED(GlobalMaxPool, 1, 21, Pool>); +REGISTER_KERNEL(GlobalMaxPool, 22, Pool>); } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc index b78c5236e6fab..c0171f7728ea8 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_gru.cc @@ -152,9 +152,18 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T1", DataTypeImpl::GetTensorType()), DeepCpuGruOp); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( GRU, 14, + 21, + KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + DeepCpuGruOp); + +ONNX_CPU_OPERATOR_KERNEL( + GRU, + 22, KernelDefBuilder().TypeConstraint("T", {DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType()}) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc index 09bbf6c4c79e6..e95ad707cf2b0 100644 --- a/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc +++ b/onnxruntime/core/providers/cpu/rnn/deep_cpu_lstm.cc @@ -163,7 +163,14 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LSTM, 7, 13, .TypeConstraint("T1", DataTypeImpl::GetTensorType()), DeepCpuLstmOp); -ONNX_CPU_OPERATOR_KERNEL(LSTM, 14, +ONNX_CPU_OPERATOR_VERSIONED_KERNEL(LSTM, 14, 21, + KernelDefBuilder() + .TypeConstraint("T", {DataTypeImpl::GetTensorType(), + DataTypeImpl::GetTensorType()}) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + DeepCpuLstmOp); + +ONNX_CPU_OPERATOR_KERNEL(LSTM, 22, KernelDefBuilder() .TypeConstraint("T", {DataTypeImpl::GetTensorType(), DataTypeImpl::GetTensorType()}) diff --git a/onnxruntime/core/providers/cpu/rnn/rnn.cc b/onnxruntime/core/providers/cpu/rnn/rnn.cc index 82a689c830493..f061dfcf4827c 100644 --- a/onnxruntime/core/providers/cpu/rnn/rnn.cc +++ b/onnxruntime/core/providers/cpu/rnn/rnn.cc @@ -24,9 +24,17 @@ ONNX_CPU_OPERATOR_VERSIONED_KERNEL( .TypeConstraint("T1", DataTypeImpl::GetTensorType()), RNN); +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( + RNN, + 14, 21, + KernelDefBuilder() + .TypeConstraint("T", DataTypeImpl::GetTensorType()) + .TypeConstraint("T1", DataTypeImpl::GetTensorType()), + RNN); + ONNX_CPU_OPERATOR_KERNEL( RNN, - 14, + 22, KernelDefBuilder() .TypeConstraint("T", DataTypeImpl::GetTensorType()) .TypeConstraint("T1", DataTypeImpl::GetTensorType()), diff --git a/onnxruntime/core/providers/cpu/tensor/eye_like.cc b/onnxruntime/core/providers/cpu/tensor/eye_like.cc index ce7048fb8e76f..5972638a88fbd 100644 --- a/onnxruntime/core/providers/cpu/tensor/eye_like.cc +++ b/onnxruntime/core/providers/cpu/tensor/eye_like.cc @@ -18,9 +18,23 @@ ORT_SPECIFY_OP_KERNEL_ARG_DEFAULT_TYPES_ALL_OPSETS( using EnabledEyeLikeDataTypes = ORT_OP_KERNEL_ARG_ENABLED_TYPE_LIST_ALL_OPSETS( kCpuExecutionProvider, kOnnxDomain, EyeLike, Output, 0); -ONNX_CPU_OPERATOR_KERNEL( +ONNX_CPU_OPERATOR_VERSIONED_KERNEL( EyeLike, 9, + 21, + KernelDefBuilder() + .TypeConstraint( + "T1", + BuildKernelDefConstraintsFromTypeList()) + .TypeConstraint( + "T2", + BuildKernelDefConstraintsFromTypeList()), + EyeLike); + +// Opset 22 starts to support bfloat16 +ONNX_CPU_OPERATOR_KERNEL( + EyeLike, + 22, KernelDefBuilder() .TypeConstraint( "T1", diff --git a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc index a83ba378d7f1e..d673fcce223e6 100644 --- a/onnxruntime/core/providers/cpu/tensor/grid_sample.cc +++ b/onnxruntime/core/providers/cpu/tensor/grid_sample.cc @@ -11,23 +11,29 @@ namespace onnxruntime { -#define REGISTER_KERNEL_TYPED(T) \ - ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 16, 19, T, kCpuExecutionProvider, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ - GridSample); - -#define REGISTER_KERNEL_TYPED_20(T) \ - ONNX_OPERATOR_TYPED_KERNEL_EX(GridSample, kOnnxDomain, 20, T, kCpuExecutionProvider, \ - KernelDefBuilder() \ - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ - GridSample); - -REGISTER_KERNEL_TYPED(float) -REGISTER_KERNEL_TYPED_20(float) -REGISTER_KERNEL_TYPED_20(double) +#define REGISTER_VERSIONED_KERNEL_TYPED(START_VER, END_VER, T) \ + ONNX_CPU_OPERATOR_VERSIONED_TYPED_KERNEL(GridSample, START_VER, END_VER, T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ + GridSample); + +#define REGISTER_KERNEL_TYPED(VER, T) \ + ONNX_CPU_OPERATOR_TYPED_KERNEL(GridSample, VER, T, \ + KernelDefBuilder() \ + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \ + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \ + GridSample); + +REGISTER_VERSIONED_KERNEL_TYPED(16, 19, float) +REGISTER_VERSIONED_KERNEL_TYPED(16, 19, double) + +REGISTER_VERSIONED_KERNEL_TYPED(20, 21, float) +REGISTER_VERSIONED_KERNEL_TYPED(20, 21, double) + +// Opset 22 supports BFloat16 +REGISTER_KERNEL_TYPED(22, float) +REGISTER_KERNEL_TYPED(22, double) // Restore normalized location to actual image location // When align_corners is true: diff --git a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc index ad49560f526e0..db5ce1742e37c 100644 --- a/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc +++ b/onnxruntime/test/contrib_ops/fused_matmul_op_test.cc @@ -222,10 +222,9 @@ TEST(FusedMatMulOpTest, FloatTypeNoTranspose) { } #if defined(USE_CUDA) || defined(USE_ROCM) // double support only implemented in CUDA/ROCM kernel -// CUDAExecutionProvider cannot be used with this model due to its ONNX opset not being supported by the layout transformer. -// TEST(FusedMatMulOpTest, DoubleTypeNoTranspose) { -// RunFusedMatMulTest("FusedMatMul", 1); -// } +TEST(FusedMatMulOpTest, DoubleTypeNoTranspose) { + RunFusedMatMulTest("FusedMatMul", 1); +} #endif TEST(FusedMatMulOpTest, FloatTypeTransposeA) { diff --git a/onnxruntime/test/onnx/TestCase.cc b/onnxruntime/test/onnx/TestCase.cc index 51653f8c6ddac..b9b69fdc74b4d 100644 --- a/onnxruntime/test/onnx/TestCase.cc +++ b/onnxruntime/test/onnx/TestCase.cc @@ -1396,10 +1396,10 @@ std::unique_ptr> GetBrokenTests(const std::string& provider broken_tests->insert({"resize_upsample_sizes_nearest", "result differs"}); broken_tests->insert({"resize_upsample_sizes_nearest_axes_2_3", "result differs"}); broken_tests->insert({"resize_upsample_sizes_nearest_axes_3_2", "result differs"}); - broken_tests->insert({"convtranspose_group_2", "group attribute (new of opset(22)) not supported"}); - broken_tests->insert({"convtranspose_group_2_image_3", "group attribute (new of opset(22)) not supported"}); broken_tests->insert({"resize_upsample_sizes_nearest_not_larger", "output=Y:expected 1 (3f800000), got 4 (40800000), diff: 3, tol=0.002 idx=24. 13 of 49 differ. CPU test passed."}); + broken_tests->insert({"convtranspose_group_2", "Segmentation fault (core dumped). CPU test passed."}); + broken_tests->insert({"convtranspose_group_2_image_3", "Segmentation fault (core dumped). CPU test passed."}); } #ifdef DISABLE_CONTRIB_OPS diff --git a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc index 9f21b64681739..85ca7c1ed328e 100644 --- a/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc +++ b/onnxruntime/test/providers/xnnpack/xnnpack_basic_test.cc @@ -266,7 +266,7 @@ TEST(XnnpackEP, TestQDQConvS8S8_per_channel) { RunModelTestWithPath(ort_model_path, "xnnpack_qdq_test_graph_conv_s8s8_perchannel", graph_verify, 0.2f); } -TEST(XnnpackEP, DISABLED_TestAveragePool) { // [ONNXRuntimeError] : 9 : NOT_IMPLEMENTED : Could not find an implementation for AveragePool(19) node with name 'node' +TEST(XnnpackEP, TestAveragePool) { const std::vector input_shape = {1, 2, 3, 3}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); @@ -295,7 +295,7 @@ TEST(XnnpackEP, DISABLED_TestQDQAveragePool) { // [ONNXRuntimeError] : 9 : NOT }); } -TEST(XnnpackEP, DISABLED_TestMaxPool) { // NOT_IMPLEMENTED : Could not find an implementation for MaxPool(22) node with name 'node' +TEST(XnnpackEP, TestMaxPool) { const std::vector input_shape = {1, 2, 13, 13}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); @@ -395,7 +395,7 @@ TEST(XnnpackEP, TestConvTranspose) { RunModelTestWithPath(ort_model_path, "test_conv_follow_convtrans", nullptr); } -TEST(XnnpackEP, DISABLED_TestConvTranspose_With_Outputpadding) { // NOT_IMPLEMENTED : Could not find an implementation for ConvTranspose(22) node with name 'node' +TEST(XnnpackEP, TestConvTranspose_With_Outputpadding) { const std::vector input_shape = {1, 4, 15, 15}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -127.f, 127.f); @@ -415,7 +415,7 @@ TEST(XnnpackEP, DISABLED_TestConvTranspose_With_Outputpadding) { // NOT_IMPLEME }); } -TEST(XnnpackEP, DISABLED_TestConvTranspose_With_OutputShape) { // NOT_IMPLEMENTED : Could not find an implementation for ConvTranspose(22) node with name 'node' +TEST(XnnpackEP, TestConvTranspose_With_OutputShape) { const std::vector input_shape = {1, 4, 15, 15}; auto modelBuilder = [&input_shape](ModelTestBuilder& builder) { auto* input_arg = builder.MakeInput(input_shape, -127.f, 127.f); diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index 0540fb3912e81..e54e83d969622 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -323,46 +323,7 @@ "^test_dequantizelinear_int4", "^test_dequantizelinear_uint4", "^test_quantizelinear_int4", - "^test_quantizelinear_uint4", - // onnx 1.17.0 op tests: skip until implemented in ORT - "^test_acos*", // Could not find an implementation for Acos(22) - "^test_acosh*", // Could not find an implementation for Acosh(22) - "^test_asin*", // Could not find an implementation for Asin(22) - "^test_asinh*", // Could not find an implementation for Asinh(22) - "^test_atan*", // Could not find an implementation for Atan(22) - "^test_atanh*", // Could not find an implementation for Atanh(22) - "^test_basic_conv_with_padding*", // Could not find an implementation for Conv(22) - "^test_basic_conv_without_padding*", // Could not find an implementation for Conv(22) - "^test_conv*", // Could not find an implementation for Conv(22) - "^test_convtranspose*", // Could not find an implementation for ConvTranspose(22) - "^test_cos*", // Could not find an implementation for Cos(22) - "^test_cosh*", // Could not find an implementation for Cosh(22) - "^test_det*", // Could not find an implementation for Det(22) - "^test_dropout*", // Could not find an implementation for Dropout(22) - "^test_elu*", // Could not find an implementation for Elu(22) - "^test_eyelike*", // Could not find an implementation for EyeLike(22) - "^test_globalaveragepool*", // Could not find an implementation for GlobalAveragePool(22) - "^test_globalmaxpool*", // Could not find an implementation for GlobalMaxPool(22) - "^test_gridsample*", // Could not find an implementation for GridSample(22) - "^test_gru*", // Could not find an implementation for GRU(22) - "^test_hardsigmoid*", // Could not find an implementation for HardSigmoid(22) - "^test_hardswish*", // Could not find an implementation for HardSigmoid(22) - "^test_instancenorm*", // Could not find an implementation for InstanceNormalization(22) - "^test_lppool*", // Could not find an implementation for LpPool(22) - "^test_lstm*", // Could not find an implementation for LSTM(22) - "^test_maxpool*", // Could not find an implementation for MaxPool(22) - "^test_maxunpool*", // Could not find an implementation for MaxUnpool(22) - "^test_mish*", // Could not find an implementation for Softplus(22) - "^test_rnn*", // Could not find an implementation for RNN(22) - "^test_round*", // Could not find an implementation for Round(22) - "^test_selu*", // Could not find an implementation for Selu(22) - "^test_simple_rnn*", // Could not find an implementation for RNN(22) - "^test_sin*", // Could not find an implementation for Sin(22) - "^test_sinh*", // Could not find an implementation for Sinh(22) - "^test_softplus*", // Could not find an implementation for Softplus(22) - "^test_softsign*", // Could not find an implementation for Softsign(22) - "^test_tan*", // Could not find an implementation for Tan(22) - "^test_thresholdedrelu*" // Could not find an implementation for ThresholdedRelu(22) + "^test_quantizelinear_uint4" ], "current_failing_tests_x86": [ "^test_vgg19",