Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Kernel operator tuning #8686

Merged
merged 5 commits into from
Nov 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ mxnet_option(USE_LAPACK "Build with lapack support" ON IF NOT MSVC)
mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
mxnet_option(USE_MKLML_MKL "Use MKLML variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
mxnet_option(USE_MKL_EXPERIMENTAL "Use experimental MKL (if MKL enabled and found)" OFF)
mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON AND NOT MSVC)
mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support (if found)" ON)
mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON)
mxnet_option(USE_PROFILER "Build with Profiler support" OFF)
Expand Down Expand Up @@ -143,6 +144,8 @@ if(USE_MKL_IF_AVAILABLE)
if(NOT MSVC)
list(APPEND mxnet_LINKER_LIBS dl)
endif()
# If using MKL, use the Intel OMP libraries
list(APPEND mxnet_LINKER_LIBS iomp5)
if(USE_MKL_EXPERIMENTAL)
add_definitions(-DMKL_EXPERIMENTAL=1)
else()
Expand Down Expand Up @@ -260,11 +263,22 @@ endif()
# ---[ OpenMP
if(USE_OPENMP)
find_package(OpenMP REQUIRED)
if(OPENMP_FOUND)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/openmp/CMakeLists.txt)
# Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp
set(OPENMP_STANDALONE_BUILD TRUE)
set(LIBOMP_ENABLE_SHARED FALSE)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/openmp)
list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5)
list(APPEND mxnet_LINKER_LIBS omp)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else()
if(OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
endif()
elseif(UNIX)
list(APPEND mxnet_LINKER_LIBS pthread)
Expand Down Expand Up @@ -353,6 +367,10 @@ if(USE_PLUGINS_WARPCTC)
list(APPEND CUDA ${PLUGINS_CUSRC})
endif()

if(USE_OPERATOR_TUNING)
add_definitions(-DMXNET_USE_OPERATOR_TUNING=1)
endif()

if(USE_PLUGIN_CAFFE)
if(NOT USE_CUDA)
set(CPU_ONLY ON)
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ ifeq ($(USE_MKL2017), 1)
LDFLAGS += -liomp5
endif

ifeq ($(USE_OPERATOR_TUNING), 1)
CFLAGS += -DMXNET_USE_OPERATOR_TUNING=1
endif

# verify existence of separate lapack library when using blas/openblas/atlas
# switch off lapack support in case it can't be found
# issue covered with this
Expand Down
6 changes: 6 additions & 0 deletions make/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
# sudo apt-get install -y libcurl4-openssl-dev
USE_S3 = 0

#----------------------------
# performance settings
#----------------------------
# Use operator tuning
USE_OPERATOR_TUNING = 1

# Use gperftools if found
USE_GPERFTOOLS = 1

Expand Down
94 changes: 69 additions & 25 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "math.h"
#include "math_functions-inl.h"
#include "special_functions-inl.h"
#include "./mxnet_op.h"

#ifdef __CUDACC__
#include <cuda_fp16.h>
Expand All @@ -39,6 +40,24 @@ namespace mxnet {
namespace op {
namespace mshadow_op {

/*!
* \brief Use the 'MXNET_TUNABLE_MSHADOW_OP_FWD_AND_BWD' macro outside of the mshadow_op namespace
* See mxnet_op.h for a description of 'MXNET_TUNABLE_MSHADOW_OP_FWD_AND_BWD'
*
* \note An entry for the operator must also be added in operator_tune.cc, which will register it
* for auto-tuning and also hold its workload weight
*/
#define MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(__op$) \
} MXNET_TUNABLE_MSHADOW_OP_FWD_AND_BWD(mshadow_op::__op$) namespace mshadow_op { // NOLINT(*)
/*!
* \brief Use the 'MXNET_TUNABLE_MSHADOW_OP_BACKWARD' macro outside of the mshadow_op namespace
* See mxnet_op.h for a description of 'MXNET_TUNABLE_MSHADOW_OP_BACKWARD'
*
* \note An entry for the operator must also be added in operator_tune.cc, which will register it
* for auto-tuning and also hold its workload weight
*/
#define MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(__op$) \
} MXNET_TUNABLE_MSHADOW_OP_BACKWARD(mshadow_op::__op$) namespace mshadow_op { // NOLINT(*)
#ifdef __CUDA_ARCH__
__constant__ const float PI = 3.14159265358979323846;
#else
Expand All @@ -49,36 +68,41 @@ using std::enable_if;
using std::is_unsigned;

#define MXNET_UNARY_MATH_OP(name, expr) \
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a) { \
return DType(expr); \
} \
}
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a) { \
return DType(expr); \
} \
}; \
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(name)


#define MXNET_UNARY_MATH_OP_NC(name, expr) \
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a) { \
return (expr); \
} \
}
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a) { \
return (expr); \
} \
}; \
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(name)

#define MXNET_BINARY_MATH_OP(name, expr) \
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a, DType b) { \
return DType(expr); \
} \
}
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a, DType b) { \
return DType(expr); \
} \
}; \
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(name)

#define MXNET_BINARY_MATH_OP_NC(name, expr) \
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a, DType b) { \
return (expr); \
} \
}
struct name { \
template<typename DType> \
MSHADOW_XINLINE static DType Map(DType a, DType b) { \
return (expr); \
} \
}; \
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(name)

#define MXNET_SIMPLE_UNARY_MATH_OP(name) MXNET_UNARY_MATH_OP(name, math::name(a))

Expand Down Expand Up @@ -134,6 +158,7 @@ struct softrelu {
}
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(softrelu)

MXNET_UNARY_MATH_OP(softrelu_grad, -math::expm1(-a));

Expand All @@ -154,6 +179,7 @@ struct log10_grad {
return DType(0.4342944819f / static_cast<float>(a));
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(log10_grad)

template<>
MSHADOW_XINLINE double log10_grad::Map<double>(double a) {
Expand All @@ -169,6 +195,7 @@ struct log2_grad {
return DType(1.442695041f / static_cast<float>(a));
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(log2_grad)

template<>
MSHADOW_XINLINE double log2_grad::Map<double>(double a) {
Expand Down Expand Up @@ -263,6 +290,7 @@ struct sign {
return DType(0);
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(sign)

MXNET_UNARY_MATH_OP_NC(sign_grad, DType(0));

Expand Down Expand Up @@ -333,6 +361,7 @@ struct rint {
return DType((af - floor) <= (ceil - af) ? floor : ceil);
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(rint)

/*! \brief used to round number to integer nearest to 0 */
struct fix {
Expand All @@ -343,6 +372,7 @@ struct fix {
return DType((floor > 0 ? floor : -floor) < (ceil > 0 ? ceil : -ceil) ? floor : ceil);
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(fix)

/*! \brief used for generate gradient of MAE loss*/
MXNET_BINARY_MATH_OP_NC(minus_sign, a - b > DType(0) ? DType(1) : -DType(1));
Expand Down Expand Up @@ -405,6 +435,7 @@ struct mod {
}
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(mod)

template<>
MSHADOW_XINLINE mshadow::half::half2_t mod::Map<mshadow::half::half2_t>
Expand All @@ -419,6 +450,8 @@ struct mod_grad {
return DType(0);
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(mod_grad)

template<>
MSHADOW_XINLINE double mod_grad::Map<double>(double a, double b) {
return 1.0;
Expand Down Expand Up @@ -454,6 +487,8 @@ struct mod_rgrad {
return DType(0);
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(mod_rgrad)

template<>
MSHADOW_XINLINE double mod_rgrad::Map<double>(double a, double b) {
return -::floor(a/b);
Expand Down Expand Up @@ -517,6 +552,7 @@ struct rmod {
}
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(rmod)

template<>
MSHADOW_XINLINE mshadow::half::half2_t rmod::Map<mshadow::half::half2_t>
Expand All @@ -531,6 +567,8 @@ struct rmod_grad {
return DType(0);
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(rmod_grad)

template<>
MSHADOW_XINLINE double rmod_grad::Map<double>(double a, double b) {
return -::floor(b/a);
Expand Down Expand Up @@ -572,6 +610,7 @@ struct clip {
}
}
};
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(clip)

/***** gamma ******/

Expand All @@ -585,6 +624,7 @@ struct gamma_grad {
return DType(math::tgamma(af) * special_functions::cephes::psi<float>(af));
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(gamma_grad)

template<>
MSHADOW_XINLINE double gamma_grad::Map<double>(double a) {
Expand All @@ -602,6 +642,7 @@ struct gammaln_grad {
return DType(special_functions::cephes::psi<float>(a));
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(gammaln_grad)

template<>
MSHADOW_XINLINE double gammaln_grad::Map<double>(double a) {
Expand Down Expand Up @@ -633,6 +674,7 @@ struct smooth_l1_loss {
}
}
}; // struct smooth_l1_loss
MSHADOW_OP_DECLARE_TUNABLE_FWD_AND_BWD(smooth_l1_loss)

/* The derivative of smooth l1 loss is
* f'(x) = sigma^2 * x, |x| < 1 / sigma^2
Expand All @@ -654,6 +696,7 @@ struct smooth_l1_gradient {
}
}
}; // struct smooth_l1_derivative
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(smooth_l1_gradient)

/*! \brief product reducer */
struct product {
Expand Down Expand Up @@ -755,6 +798,7 @@ struct nansum_grad {
return isnan_typed::IsNan(a) ? DType(0) : DType(1);
}
};
MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(nansum_grad)

/*! \brief product reducer that ignores NaN values in the input */
struct nanprod {
Expand Down Expand Up @@ -791,7 +835,7 @@ struct nanprod_grad {
return isnan_typed::IsNan(a) ? DType(0) : b / a;
}
};

MSHADOW_OP_DECLARE_TUNABLE_BACKWARD(nanprod_grad)
} // namespace mshadow_op
} // namespace op
} // namespace mxnet
Expand Down
Loading