diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md
index 507c6722bc349..67e0c9f8620d8 100644
--- a/docs/OperatorKernels.md
+++ b/docs/OperatorKernels.md
@@ -697,7 +697,8 @@ Do not modify directly.*
|DepthToSpace|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16)|
|||[11, 12]|**T** = tensor(double), tensor(float), tensor(float16)|
|||[1, 10]|**T** = tensor(double), tensor(float), tensor(float16)|
-|DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**
or
*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**
or
*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T3**|21+|**T1** = tensor(float8e4m3fn), tensor(float8e5m2), tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
+|DequantizeLinear|*in* x:**T**
*in* x_scale:**tensor(float)**
*in* x_zero_point:**T**
*out* y:**tensor(float)**
or
*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**
or
*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T3**|25+|**T1** = tensor(float8e4m3fn), tensor(float8e5m2), tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
+|||[21, 24]|**T1** = tensor(float8e4m3fn), tensor(float8e5m2), tensor(int4), tensor(int8), tensor(uint4), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
|||[19, 20]|**T1** = tensor(float8e4m3fn), tensor(float8e5m2), tensor(int8), tensor(uint8)
**T2** = tensor(float), tensor(float16)|
|||[13, 18]|**T** = tensor(int8), tensor(uint8)|
|||[10, 12]|**T** = tensor(int8), tensor(uint8)|
diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
index 60ac16018f539..29071e8230f5c 100755
--- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
+++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc
@@ -1521,19 +1521,19 @@ class ONNX_OPERATOR_VERSIONED_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kO
// Opset 21.
// TODO(fajin): support other quantized types
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, uint8_t, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, int8_t, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, uint8_t, MLFloat16, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, int8_t, MLFloat16, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, UInt4x2, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Int4x2, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, UInt4x2, MLFloat16, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Int4x2, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, uint8_t, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, int8_t, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, uint8_t, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, int8_t, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, UInt4x2, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Int4x2, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, UInt4x2, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Int4x2, MLFloat16, DequantizeLinear);
#if !defined(DISABLE_FLOAT8_TYPES)
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Float8E4M3FN, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Float8E5M2, float, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Float8E4M3FN, MLFloat16, DequantizeLinear);
-class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, Float8E5M2, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Float8E4M3FN, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Float8E5M2, float, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Float8E4M3FN, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, 24, Float8E5M2, MLFloat16, DequantizeLinear);
#endif
class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 21, uint8_t, float, QuantizeLinear);
@@ -1639,6 +1639,22 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 24, MLFloat16, Attention);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 24, BFloat16, Attention);
+// Opset 25.
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, uint8_t, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, int8_t, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, uint8_t, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, int8_t, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, UInt4x2, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Int4x2, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, UInt4x2, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Int4x2, MLFloat16, DequantizeLinear);
+#if !defined(DISABLE_FLOAT8_TYPES)
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Float8E4M3FN, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Float8E5M2, float, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Float8E4M3FN, MLFloat16, DequantizeLinear);
+class ONNX_OPERATOR_TWO_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 25, Float8E5M2, MLFloat16, DequantizeLinear);
+#endif
+
#endif
template <>
@@ -2606,19 +2622,19 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
// Opset 21
// TODO(fajin): support other quantized types
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
#if !defined(DISABLE_FLOAT8_TYPES)
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
- BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
#endif
BuildKernelCreateInfo,
BuildKernelCreateInfo,
@@ -2721,6 +2737,22 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo,
BuildKernelCreateInfo,
BuildKernelCreateInfo,
+
+ // Opset 25
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+#if !defined(DISABLE_FLOAT8_TYPES)
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+ BuildKernelCreateInfo,
+#endif
#endif
};
diff --git a/onnxruntime/core/providers/cuda/tensor/quantize_linear.cc b/onnxruntime/core/providers/cuda/tensor/quantize_linear.cc
index 6a5dbc433fb1e..3c0583057fa29 100644
--- a/onnxruntime/core/providers/cuda/tensor/quantize_linear.cc
+++ b/onnxruntime/core/providers/cuda/tensor/quantize_linear.cc
@@ -590,11 +590,38 @@ REGISTER_DQ_KERNEL_TWO_TYPED_19_20(Float8E4M3FN, MLFloat16)
REGISTER_DQ_KERNEL_TWO_TYPED_19_20(Float8E5M2, MLFloat16)
#endif
-#define REGISTER_DQ_KERNEL_TWO_TYPED_21(T, U) \
+#define REGISTER_DQ_KERNEL_TWO_TYPED_21_24(T, U) \
+ ONNX_OPERATOR_VERSIONED_TWO_TYPED_KERNEL_EX( \
+ DequantizeLinear, \
+ kOnnxDomain, \
+ 21, 24, \
+ T, U, \
+ kCudaExecutionProvider, \
+ (*KernelDefBuilder::Create()) \
+ .TypeConstraint("T1", DataTypeImpl::GetTensorType()) \
+ .TypeConstraint("T2", DataTypeImpl::GetTensorType()), \
+ DequantizeLinear);
+
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(uint8_t, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(int8_t, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(uint8_t, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(int8_t, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(UInt4x2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Int4x2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(UInt4x2, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Int4x2, MLFloat16)
+#if !defined(DISABLE_FLOAT8_TYPES)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Float8E4M3FN, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Float8E5M2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Float8E4M3FN, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_21_24(Float8E5M2, MLFloat16)
+#endif
+
+#define REGISTER_DQ_KERNEL_TWO_TYPED_25(T, U) \
ONNX_OPERATOR_TWO_TYPED_KERNEL_EX( \
DequantizeLinear, \
kOnnxDomain, \
- 21, \
+ 25, \
T, U, \
kCudaExecutionProvider, \
(*KernelDefBuilder::Create()) \
@@ -602,19 +629,19 @@ REGISTER_DQ_KERNEL_TWO_TYPED_19_20(Float8E5M2, MLFloat16)
.TypeConstraint("T2", DataTypeImpl::GetTensorType()), \
DequantizeLinear);
-REGISTER_DQ_KERNEL_TWO_TYPED_21(uint8_t, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(int8_t, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(uint8_t, MLFloat16)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(int8_t, MLFloat16)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(UInt4x2, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Int4x2, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(UInt4x2, MLFloat16)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Int4x2, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(uint8_t, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(int8_t, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(uint8_t, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(int8_t, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(UInt4x2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Int4x2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(UInt4x2, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Int4x2, MLFloat16)
#if !defined(DISABLE_FLOAT8_TYPES)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Float8E4M3FN, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Float8E5M2, float)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Float8E4M3FN, MLFloat16)
-REGISTER_DQ_KERNEL_TWO_TYPED_21(Float8E5M2, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Float8E4M3FN, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Float8E5M2, float)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Float8E4M3FN, MLFloat16)
+REGISTER_DQ_KERNEL_TWO_TYPED_25(Float8E5M2, MLFloat16)
#endif
// specialize QuantizeLinear::ComputeInternal and DequantizeLinear::ComputeInternal