diff --git a/onnxruntime/core/providers/webgpu/math/softmax.cc b/onnxruntime/core/providers/webgpu/math/softmax.cc index 6a6cfe154b91c..178ca0b9e0515 100644 --- a/onnxruntime/core/providers/webgpu/math/softmax.cc +++ b/onnxruntime/core/providers/webgpu/math/softmax.cc @@ -156,7 +156,9 @@ Status Softmax::ComputeInternal(ComputeContext& context) const { // normalize axis size_t axis = static_cast(HandleNegativeAxis(axis_, input_rank)); - bool is_transpose_required = axis < input_rank - 1; + // The `axis` attribute of the opset lower than version 13 describes the axis of the inputs when coerced to 2D, + // the 0th axis most likely describes the batch_size, so transpose is not required on old opset versions. + bool is_transpose_required = axis < input_rank - 1 && opset_ >= 13; TensorShape transposed_input_shape; Tensor transposed_input_tensor; @@ -179,7 +181,9 @@ Status Softmax::ComputeInternal(ComputeContext& context) const { intermediate_output = context.CreateGPUTensor(output_tensor->DataType(), transposed_input_shape); } - const int64_t cols = is_transpose_required ? transposed_input_shape[input_rank - 1] : input_shape[input_rank - 1]; + // The `axis` attribute of the opset lower than version 13 separates input tensor's dimensions into two parts, + // one part is treated as batch size, and the other part is performed by Softmax. + const int64_t cols = is_transpose_required ? transposed_input_shape[input_rank - 1] : (opset_ >= 13 ? input_shape[input_rank - 1] : input_shape.SizeFromDimension(axis)); const int64_t rows = input_shape.Size() / cols; const int64_t components = GetMaxComponents(cols); const auto packed_cols = cols / components; diff --git a/onnxruntime/core/providers/webgpu/math/softmax.h b/onnxruntime/core/providers/webgpu/math/softmax.h index cc97611dcb4bc..532a56ff0be41 100644 --- a/onnxruntime/core/providers/webgpu/math/softmax.h +++ b/onnxruntime/core/providers/webgpu/math/softmax.h @@ -14,7 +14,7 @@ namespace webgpu { class Softmax final : public WebGpuKernel { public: Softmax(const OpKernelInfo& info) : WebGpuKernel{info} { - int opset_ = info.node().SinceVersion(); + opset_ = info.node().SinceVersion(); int64_t axis; Status status = info.GetAttr("axis", &axis); @@ -33,6 +33,7 @@ class Softmax final : public WebGpuKernel { private: int64_t axis_; + int opset_; }; class SoftmaxProgram final : public Program { diff --git a/onnxruntime/test/providers/cpu/math/softmax_test.cc b/onnxruntime/test/providers/cpu/math/softmax_test.cc index 03f5fdaab2780..1c6375ebdb0b1 100644 --- a/onnxruntime/test/providers/cpu/math/softmax_test.cc +++ b/onnxruntime/test/providers/cpu/math/softmax_test.cc @@ -422,8 +422,7 @@ TEST(SoftmaxOperator, GH15949_regression_test) { {0.00032932f, 0.01798029f, 0.9816904f}); // disable TRT as it does not support axis=0 as used by the model - // TODO: Fix the Softmax operator of WebGPU EP. - tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kWebGpuExecutionProvider}); + tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } } // namespace test