Skip to content

Commit

Permalink
use mkl sparse matrix to improve performance (apache#14492)
Browse files Browse the repository at this point in the history
* use mkl sparse matrix to improve performance

* fix build fail issue

* add 3rdparty/sparse matrix  in Makefile

* add macro for variable

* fix lib not find error

* fix gpu R test error

* fix Mac build error

* add lib/libsparse_matrix.so to CI

* fix indentation

* retrigger CI
  • Loading branch information
triplekings authored and eric-haibin-lin committed Apr 13, 2019
1 parent 1c49e40 commit c437d5b
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 3 deletions.
21 changes: 21 additions & 0 deletions 3rdparty/sparse-matrix/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CC = g++
C = gcc
MKLROOT = /opt/intel/mkl

ifneq ($(USE_INTEL_PATH),)
MKLROOT = $(USE_INTEL_PATH)/mkl
endif

CFLAGS = -fpic -O2 -I/opt/intel/mkl/include -c -Wall -Werror -DMKL_ILP64 -m64 -std=c++11
LDFLAGS = -Wl,--start-group -L${MKLROOT}/../compiler/lib/intel64 ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl

default: libsparse_matrix.so

libsparse_matrix.so: sparse_matrix.o
$(CC) -shared -o libsparse_matrix.so sparse_matrix.o $(LDFLAGS)

sparse_matrix.o: sparse_matrix.cc sparse_matrix.h
$(CC) $(CFLAGS) sparse_matrix.cc

clean:
$(RM) libsparse_matrix.so *.o *~
45 changes: 45 additions & 0 deletions 3rdparty/sparse-matrix/sparse_matrix.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <iostream>
#include <string>
#include <fstream>
#include <mkl_spblas.h>
#include "sparse_matrix.h"



bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx,
float* values, float* X, float* y,
int rows, int cols, int X_columns)
{

sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
sparse_status_t status;
sparse_matrix_t A = NULL;
sparse_layout_t layout = SPARSE_LAYOUT_ROW_MAJOR;
float one, zero;
one = (float)1.0;
zero = (float)0.0;

MKL_INT* rows_end = rows_start + 1;
status = mkl_sparse_s_create_csr(&A, indexing, rows, cols, rows_start, rows_end, col_indx, values);

if (status != SPARSE_STATUS_SUCCESS)
{
std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
return false;
}
sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE;
struct matrix_descr descrA;
descrA.type = SPARSE_MATRIX_TYPE_GENERAL;

status = mkl_sparse_s_mm(operation, one, A, descrA, layout, X, X_columns, X_columns, zero, y, X_columns);
if (status != SPARSE_STATUS_SUCCESS)
{
std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
return false;
}

mkl_sparse_destroy(A);

return true;

}
48 changes: 48 additions & 0 deletions 3rdparty/sparse-matrix/sparse_matrix.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
#define MXNET_OPERATOR_SPARSE_MATRIX_INL_H_


#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER)
#define SP_INT64 __int64
#define SP_UINT64 unsigned __int64
#else
#define SP_INT64 long long int
#define SP_UINT64 unsigned long long int
#endif


#if defined _WIN32 || defined __CYGWIN__
#ifdef BUILDING_DLL
#ifdef __GNUC__
#define SPM_API_PUBLIC __attribute__ ((dllexport))
#else
#define SPM_API_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax.
#endif
#else
#ifdef __GNUC__
#define SPM_API_PUBLIC __attribute__ ((dllimport))
#else
#define SPM_API_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax.
#endif
#endif
#define SPM_API_LOCAL
#else
#if __GNUC__ >= 4
#define SPM_API_PUBLIC __attribute__ ((visibility ("default")))
#define SPM_API_LOCAL __attribute__ ((visibility ("hidden")))
#else
#define SPM_API_PUBLIC
#define SPM_API_LOCAL
#endif
#endif



extern "C"
{
extern SPM_API_PUBLIC bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx,
float* values, float* X, float* y, int rows, int cols, int X_columns);

}

#endif //MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
34 changes: 34 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ ifeq ($(USE_MKLDNN), 1)
LDFLAGS += -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,'$${ORIGIN}'
endif


# setup opencv
ifeq ($(USE_OPENCV), 1)
CFLAGS += -DMXNET_USE_OPENCV=1
Expand Down Expand Up @@ -410,6 +411,14 @@ ifeq ($(USE_DIST_KVSTORE), 1)
LDFLAGS += $(PS_LDFLAGS_A)
endif

#sparse-matrix
ifeq ($(USE_BLAS), mkl)
SPARSE_MATRIX_DIR = $(ROOTDIR)/3rdparty/sparse-matrix
LIB_DEP += $(SPARSE_MATRIX_DIR)/libsparse_matrix.so
CFLAGS += -I$(SPARSE_MATRIX_DIR)
LDFLAGS += -L$(SPARSE_MATRIX_DIR) -lsparse_matrix -Wl,-rpath,'$${ORIGIN}'
endif

.PHONY: clean all extra-packages test lint docs clean_all rcpplint rcppexport roxygen\
cython2 cython3 cython cyclean

Expand Down Expand Up @@ -547,11 +556,30 @@ ifeq ($(UNAME_S), Darwin)
endif
endif

ifeq ($(USE_BLAS), mkl)
ifeq ($(UNAME_S), Darwin)
install_name_tool -change '@rpath/libsparse_matrix.dylib' '@loader_path/libsparse_matrix.dylib' $@
endif
endif

$(PS_PATH)/build/libps.a: PSLITE

PSLITE:
$(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps

ifeq ($(USE_BLAS), mkl)
$(SPARSE_MATRIX_DIR)/libsparse_matrix.so: SPARSE_MATRIX

SPARSE_MATRIX:
ifeq ($(USE_INTEL_PATH), NONE)
$(MAKE) -C $(SPARSE_MATRIX_DIR)
else
$(MAKE) -C $(SPARSE_MATRIX_DIR) USE_INTEL_PATH=$(USE_INTEL_PATH)
endif
mkdir -p $(ROOTDIR)/lib
cp $(SPARSE_MATRIX_DIR)/libsparse_matrix.so $(ROOTDIR)/lib/
endif

$(DMLC_CORE)/libdmlc.a: DMLCCORE

DMLCCORE:
Expand Down Expand Up @@ -628,6 +656,10 @@ rpkg:
cp -rf lib/libmklml_intel.so R-package/inst/libs; \
fi

if [ -e "lib/libsparse_matrix.so" ]; then \
cp -rf lib/libsparse_matrix.so R-package/inst/libs; \
fi

mkdir -p R-package/inst/include
cp -rl include/* R-package/inst/include
Rscript -e "if(!require(devtools)){install.packages('devtools', repo = 'https://cloud.r-project.org/')}"
Expand Down Expand Up @@ -673,6 +705,7 @@ clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
$(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS))
Expand All @@ -683,6 +716,7 @@ clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
endif
Expand Down
2 changes: 1 addition & 1 deletion ci/jenkins/Jenkins_steps.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/li
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, lib/libsparse_matrix.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
mx_tensorrt_lib = 'build/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*'
mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*'
Expand Down
28 changes: 26 additions & 2 deletions src/operator/tensor/dot-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
#ifdef __CUDACC__
#include "./dot-inl.cuh"
#endif // __CUDACC__

#if (MSHADOW_USE_MKL == 1)
#include "sparse_matrix.h"
#endif
namespace mxnet {
namespace op {

Expand Down Expand Up @@ -775,13 +777,35 @@ inline void DotCsrDnsDnsImpl(const OpContext& ctx,
}

using nnvm::dim_t;

#if (MSHADOW_USE_MKL == 1)
TShape lhs_shape = lhs.shape();
TShape rhs_shape = rhs.shape_;
#endif
const TBlob data_l = lhs.data();
const TBlob indptr_l = lhs.aux_data(csr::kIndPtr);
const TBlob col_idx_l = lhs.aux_data(csr::kIdx);
const TBlob& data_r = rhs;
const TBlob data_out = *ret;

#if (MSHADOW_USE_MKL == 1)
if (data_l.type_flag_ == mshadow::kFloat32
&& indptr_l.type_flag_ == mshadow::kInt64
&& col_idx_l.type_flag_ == mshadow::kInt64
&& !trans_lhs) {
bool ret = mkl_DotCsrDnsDns(static_cast<SP_INT64*>(indptr_l.dptr_),
static_cast<SP_INT64*>(col_idx_l.dptr_),
data_l.dptr<float>(),
data_r.dptr<float>(),
data_out.dptr<float>(),
lhs_shape[0],
lhs_shape[1],
rhs_shape[1]);
if (ret) {
return;
}
}
#endif

MSHADOW_SGL_DBL_TYPE_SWITCH(data_l.type_flag_, DType, { // data type
MSHADOW_IDX_TYPE_SWITCH(indptr_l.type_flag_, IType, { // indptr type
MSHADOW_IDX_TYPE_SWITCH(col_idx_l.type_flag_, CType, { // col idx type
Expand Down

0 comments on commit c437d5b

Please sign in to comment.