Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ set(provider_excluded_files
"gpu_data_transfer.h"
"integer_gemm.cc"
"tunable/*"
"cuda_nhwc_kernels.cc"
"cuda_nhwc_kernels.h"
)

set(training_ops_excluded_files
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/core/providers/rocm/miopen_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ Status MiopenTensor::CreateTensorIfNeeded() {
return Status::OK();
}

Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType) {
Status MiopenTensor::Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc) {
if (is_nhwc) {
return ORT_MAKE_STATUS(ONNXRUNTIME, NOT_IMPLEMENTED,
"NHWC Tensor usage is not supported in AMD builds for now");
}

ORT_RETURN_IF_ERROR(CreateTensorIfNeeded());

int rank = gsl::narrow_cast<int>(input_dims.size());
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/rocm/miopen_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class MiopenTensor final {
~MiopenTensor();
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(MiopenTensor);

Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType);
Status Set(gsl::span<const int64_t> input_dims, miopenDataType_t dataType, bool is_nhwc = false);
Status Set(miopenDataType_t dataType, miopenTensorLayout_t tensor_layout, int n, int c, int h, int w);
Status Set(const MiopenTensor& x_desc, miopenBatchNormMode_t mode);

Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,28 @@ namespace rocm {
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>); \
ConvTranspose<T, false>); \
ONNX_OPERATOR_TYPED_KERNEL_EX( \
ConvTranspose, \
kOnnxDomain, \
11, \
T, \
kRocmExecutionProvider, \
(*KernelDefBuilder::Create()).TypeConstraint("T", DataTypeImpl::GetTensorType<T>()), \
ConvTranspose<T>);
ConvTranspose<T, false>);

REGISTER_KERNEL_TYPED(float)
// not yet supported in MIOpen
// REGISTER_KERNEL_TYPED(double)
REGISTER_KERNEL_TYPED(MLFloat16)

template <typename T>
Status ConvTranspose<T>::ComputeInternal(OpKernelContext* context) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::ComputeInternal(OpKernelContext* context) const {
return DoConvTranspose(context, false);
}

template <typename T>
Status ConvTranspose<T>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
template <typename T, bool NHWC>
Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const {
typedef typename ToHipType<T>::MappedType HipT;

const Tensor* X = context->Input<Tensor>(0);
Expand Down
6 changes: 4 additions & 2 deletions onnxruntime/core/providers/rocm/nn/conv_transpose.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
namespace onnxruntime {
namespace rocm {

template <typename T>
template <typename T, bool NHWC>
class ConvTranspose : public RocmKernel {
public:
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info){};
ConvTranspose(const OpKernelInfo& info) : RocmKernel(info), conv_transpose_attrs_(info) {
static_assert(!NHWC, "AMD builds don't support usage of NHWC ops");
};
Status ComputeInternal(OpKernelContext* context) const override;
Status DoConvTranspose(OpKernelContext* context, bool dynamic_padding) const;

Expand Down
2 changes: 2 additions & 0 deletions tools/ci_build/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ def convert_arg_line_to_args(self, arg_line):
"--cudnn_home is not specified.",
)
parser.add_argument("--enable_cuda_line_info", action="store_true", help="Enable CUDA line info.")
parser.add_argument("--enable_cuda_nhwc_ops", action="store_true", help="Enable CUDA NHWC ops in build.")

# Python bindings
parser.add_argument("--enable_pybind", action="store_true", help="Enable Python Bindings.")
Expand Down Expand Up @@ -1025,6 +1026,7 @@ def generate_build_tree(
"-Donnxruntime_USE_MPI=" + ("ON" if args.use_mpi else "OFF"),
"-Donnxruntime_ENABLE_MEMORY_PROFILE=" + ("ON" if args.enable_memory_profile else "OFF"),
"-Donnxruntime_ENABLE_CUDA_LINE_NUMBER_INFO=" + ("ON" if args.enable_cuda_line_info else "OFF"),
"-Donnxruntime_USE_CUDA_NHWC_OPS=" + ("ON" if args.enable_cuda_nhwc_ops else "OFF"),
"-Donnxruntime_BUILD_WEBASSEMBLY_STATIC_LIB=" + ("ON" if args.build_wasm_static_lib else "OFF"),
"-Donnxruntime_ENABLE_WEBASSEMBLY_EXCEPTION_CATCHING="
+ ("OFF" if args.disable_wasm_exception_catching else "ON"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ jobs:
--parallel \
--build_wheel \
--enable_onnx_tests --use_cuda --cuda_version=${{variables.common_cuda_version}} --cuda_home=/usr/local/cuda-${{variables.common_cuda_version}} --cudnn_home=/usr/local/cuda-${{variables.common_cuda_version}} \
--enable_cuda_profiling \
--enable_cuda_profiling --enable_cuda_nhwc_ops \
--enable_pybind --build_java \
--use_cache \
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75; \
Expand Down