From a4ea3522b6f2fdc6a68b6b247eef9dee95579fc1 Mon Sep 17 00:00:00 2001 From: PatricZhao Date: Thu, 25 Apr 2019 20:30:25 +0800 Subject: [PATCH] Revert "use mkl sparse matrix to improve performance (#14492)" This reverts commit c437d5b789c9d9892522a9c32ef6e6f20785a3a2. --- 3rdparty/sparse-matrix/Makefile | 21 ----------- 3rdparty/sparse-matrix/sparse_matrix.cc | 45 ----------------------- 3rdparty/sparse-matrix/sparse_matrix.h | 48 ------------------------- Makefile | 34 ------------------ ci/jenkins/Jenkins_steps.groovy | 2 +- src/operator/tensor/dot-inl.h | 28 ++------------- 6 files changed, 3 insertions(+), 175 deletions(-) delete mode 100644 3rdparty/sparse-matrix/Makefile delete mode 100644 3rdparty/sparse-matrix/sparse_matrix.cc delete mode 100644 3rdparty/sparse-matrix/sparse_matrix.h diff --git a/3rdparty/sparse-matrix/Makefile b/3rdparty/sparse-matrix/Makefile deleted file mode 100644 index 214312f6586c..000000000000 --- a/3rdparty/sparse-matrix/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -CC = g++ -C = gcc -MKLROOT = /opt/intel/mkl - -ifneq ($(USE_INTEL_PATH),) - MKLROOT = $(USE_INTEL_PATH)/mkl -endif - -CFLAGS = -fpic -O2 -I/opt/intel/mkl/include -c -Wall -Werror -DMKL_ILP64 -m64 -std=c++11 -LDFLAGS = -Wl,--start-group -L${MKLROOT}/../compiler/lib/intel64 ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl - -default: libsparse_matrix.so - -libsparse_matrix.so: sparse_matrix.o - $(CC) -shared -o libsparse_matrix.so sparse_matrix.o $(LDFLAGS) - -sparse_matrix.o: sparse_matrix.cc sparse_matrix.h - $(CC) $(CFLAGS) sparse_matrix.cc - -clean: - $(RM) libsparse_matrix.so *.o *~ diff --git a/3rdparty/sparse-matrix/sparse_matrix.cc b/3rdparty/sparse-matrix/sparse_matrix.cc deleted file mode 100644 index fa362f0f8a18..000000000000 --- a/3rdparty/sparse-matrix/sparse_matrix.cc +++ /dev/null @@ -1,45 +0,0 @@ -#include -#include -#include -#include -#include "sparse_matrix.h" - - - -bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx, - float* values, float* X, float* y, - int rows, int cols, int X_columns) -{ - - sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO; - sparse_status_t status; - sparse_matrix_t A = NULL; - sparse_layout_t layout = SPARSE_LAYOUT_ROW_MAJOR; - float one, zero; - one = (float)1.0; - zero = (float)0.0; - - MKL_INT* rows_end = rows_start + 1; - status = mkl_sparse_s_create_csr(&A, indexing, rows, cols, rows_start, rows_end, col_indx, values); - - if (status != SPARSE_STATUS_SUCCESS) - { - std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl; - return false; - } - sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE; - struct matrix_descr descrA; - descrA.type = SPARSE_MATRIX_TYPE_GENERAL; - - status = mkl_sparse_s_mm(operation, one, A, descrA, layout, X, X_columns, X_columns, zero, y, X_columns); - if (status != SPARSE_STATUS_SUCCESS) - { - std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl; - return false; - } - - mkl_sparse_destroy(A); - - return true; - -} diff --git a/3rdparty/sparse-matrix/sparse_matrix.h b/3rdparty/sparse-matrix/sparse_matrix.h deleted file mode 100644 index 93054a80b374..000000000000 --- a/3rdparty/sparse-matrix/sparse_matrix.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef MXNET_OPERATOR_SPARSE_MATRIX_INL_H_ -#define MXNET_OPERATOR_SPARSE_MATRIX_INL_H_ - - -#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER) -#define SP_INT64 __int64 -#define SP_UINT64 unsigned __int64 -#else -#define SP_INT64 long long int -#define SP_UINT64 unsigned long long int -#endif - - -#if defined _WIN32 || defined __CYGWIN__ - #ifdef BUILDING_DLL - #ifdef __GNUC__ - #define SPM_API_PUBLIC __attribute__ ((dllexport)) - #else - #define SPM_API_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax. - #endif - #else - #ifdef __GNUC__ - #define SPM_API_PUBLIC __attribute__ ((dllimport)) - #else - #define SPM_API_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax. - #endif - #endif - #define SPM_API_LOCAL -#else - #if __GNUC__ >= 4 - #define SPM_API_PUBLIC __attribute__ ((visibility ("default"))) - #define SPM_API_LOCAL __attribute__ ((visibility ("hidden"))) - #else - #define SPM_API_PUBLIC - #define SPM_API_LOCAL - #endif -#endif - - - -extern "C" -{ - extern SPM_API_PUBLIC bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx, - float* values, float* X, float* y, int rows, int cols, int X_columns); - -} - -#endif //MXNET_OPERATOR_SPARSE_MATRIX_INL_H_ \ No newline at end of file diff --git a/Makefile b/Makefile index 29cfd573665c..981a86830ba9 100644 --- a/Makefile +++ b/Makefile @@ -144,7 +144,6 @@ ifeq ($(USE_MKLDNN), 1) LDFLAGS += -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,'$${ORIGIN}' endif - # setup opencv ifeq ($(USE_OPENCV), 1) CFLAGS += -DMXNET_USE_OPENCV=1 @@ -416,14 +415,6 @@ ifeq ($(USE_DIST_KVSTORE), 1) LDFLAGS += $(PS_LDFLAGS_A) endif -#sparse-matrix -ifeq ($(USE_BLAS), mkl) - SPARSE_MATRIX_DIR = $(ROOTDIR)/3rdparty/sparse-matrix - LIB_DEP += $(SPARSE_MATRIX_DIR)/libsparse_matrix.so - CFLAGS += -I$(SPARSE_MATRIX_DIR) - LDFLAGS += -L$(SPARSE_MATRIX_DIR) -lsparse_matrix -Wl,-rpath,'$${ORIGIN}' -endif - .PHONY: clean all extra-packages test lint docs clean_all rcpplint rcppexport roxygen\ cython2 cython3 cython cyclean @@ -561,30 +552,11 @@ ifeq ($(UNAME_S), Darwin) endif endif -ifeq ($(USE_BLAS), mkl) -ifeq ($(UNAME_S), Darwin) - install_name_tool -change '@rpath/libsparse_matrix.dylib' '@loader_path/libsparse_matrix.dylib' $@ -endif -endif - $(PS_PATH)/build/libps.a: PSLITE PSLITE: $(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps -ifeq ($(USE_BLAS), mkl) -$(SPARSE_MATRIX_DIR)/libsparse_matrix.so: SPARSE_MATRIX - -SPARSE_MATRIX: -ifeq ($(USE_INTEL_PATH), NONE) - $(MAKE) -C $(SPARSE_MATRIX_DIR) -else - $(MAKE) -C $(SPARSE_MATRIX_DIR) USE_INTEL_PATH=$(USE_INTEL_PATH) -endif - mkdir -p $(ROOTDIR)/lib - cp $(SPARSE_MATRIX_DIR)/libsparse_matrix.so $(ROOTDIR)/lib/ -endif - $(DMLC_CORE)/libdmlc.a: DMLCCORE DMLCCORE: @@ -661,10 +633,6 @@ rpkg: cp -rf lib/libmklml_intel.so R-package/inst/libs; \ fi - if [ -e "lib/libsparse_matrix.so" ]; then \ - cp -rf lib/libsparse_matrix.so R-package/inst/libs; \ - fi - mkdir -p R-package/inst/include cp -rl include/* R-package/inst/include Rscript -e "if(!require(devtools)){install.packages('devtools', repo = 'https://cloud.r-project.org/')}" @@ -710,7 +678,6 @@ clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN) (cd scala-package && mvn clean) || true cd $(DMLC_CORE); $(MAKE) clean; cd - cd $(PS_PATH); $(MAKE) clean; cd - - cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd - cd $(NNVM_PATH); $(MAKE) clean; cd - cd $(AMALGAMATION_PATH); $(MAKE) clean; cd - $(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS)) @@ -721,7 +688,6 @@ clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN) (cd scala-package && mvn clean) || true cd $(DMLC_CORE); $(MAKE) clean; cd - cd $(PS_PATH); $(MAKE) clean; cd - - cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd - cd $(NNVM_PATH); $(MAKE) clean; cd - cd $(AMALGAMATION_PATH); $(MAKE) clean; cd - endif diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index 23230ac0442f..12c1a0afc894 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -33,7 +33,7 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/li // mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default. mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests' mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0' -mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, lib/libsparse_matrix.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a' +mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a' mx_tensorrt_lib = 'build/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so' mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*' mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*' diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h index 318254b26b9f..f81eb9c04f3a 100644 --- a/src/operator/tensor/dot-inl.h +++ b/src/operator/tensor/dot-inl.h @@ -38,9 +38,7 @@ #ifdef __CUDACC__ #include "./dot-inl.cuh" #endif // __CUDACC__ -#if (MSHADOW_USE_MKL == 1) -#include "sparse_matrix.h" -#endif + namespace mxnet { namespace op { @@ -777,35 +775,13 @@ inline void DotCsrDnsDnsImpl(const OpContext& ctx, } using nnvm::dim_t; -#if (MSHADOW_USE_MKL == 1) - TShape lhs_shape = lhs.shape(); - TShape rhs_shape = rhs.shape_; -#endif + const TBlob data_l = lhs.data(); const TBlob indptr_l = lhs.aux_data(csr::kIndPtr); const TBlob col_idx_l = lhs.aux_data(csr::kIdx); const TBlob& data_r = rhs; const TBlob data_out = *ret; -#if (MSHADOW_USE_MKL == 1) - if (data_l.type_flag_ == mshadow::kFloat32 - && indptr_l.type_flag_ == mshadow::kInt64 - && col_idx_l.type_flag_ == mshadow::kInt64 - && !trans_lhs) { - bool ret = mkl_DotCsrDnsDns(static_cast(indptr_l.dptr_), - static_cast(col_idx_l.dptr_), - data_l.dptr(), - data_r.dptr(), - data_out.dptr(), - lhs_shape[0], - lhs_shape[1], - rhs_shape[1]); - if (ret) { - return; - } - } -#endif - MSHADOW_SGL_DBL_TYPE_SWITCH(data_l.type_flag_, DType, { // data type MSHADOW_IDX_TYPE_SWITCH(indptr_l.type_flag_, IType, { // indptr type MSHADOW_IDX_TYPE_SWITCH(col_idx_l.type_flag_, CType, { // col idx type