Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

use mkl sparse matrix to improve performance #14492

Merged
merged 11 commits into from
Apr 13, 2019
21 changes: 21 additions & 0 deletions 3rdparty/sparse-matrix/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CC = g++
C = gcc
MKLROOT = /opt/intel/mkl

ifneq ($(USE_INTEL_PATH),)
MKLROOT = $(USE_INTEL_PATH)/mkl
endif

CFLAGS = -fpic -O2 -I/opt/intel/mkl/include -c -Wall -Werror -DMKL_ILP64 -m64 -std=c++11
LDFLAGS = -Wl,--start-group -L${MKLROOT}/../compiler/lib/intel64 ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a ${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl

default: libsparse_matrix.so

libsparse_matrix.so: sparse_matrix.o
$(CC) -shared -o libsparse_matrix.so sparse_matrix.o $(LDFLAGS)

sparse_matrix.o: sparse_matrix.cc sparse_matrix.h
$(CC) $(CFLAGS) sparse_matrix.cc

clean:
$(RM) libsparse_matrix.so *.o *~
45 changes: 45 additions & 0 deletions 3rdparty/sparse-matrix/sparse_matrix.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#include <iostream>
#include <string>
#include <fstream>
#include <mkl_spblas.h>
#include "sparse_matrix.h"



bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it land in the 3rdparty directory, instead of src/operator/?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there future plan to support other sparse matrix ops, like dense + csr, csr + csr, csr * csr (elemwise)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eric-haibin-lin
If you requests, e.g. mkl has
mkl_sparse_spmm s, d, c, z Computes the product of two sparse matrices and stores
the result as a sparse matrix
why this patch is merge first is because Wide & Deep use sparse dot dense matrix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it land in the 3rdparty directory, instead of src/operator/?

This is the quick solution for the wide deep. To move the code into src/operator, need to change code in mshadow and more tests are needed. I will do it as the next step.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation. Why mshadow has to be changed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mshadw use 32bit interface , below is the link for mkl settings
https://software.intel.com/en-us/articles/intel-mkl-link-line-advisor
and the make should change
diff --git a/make/mshadow.mk b/make/mshadow.mk
index 86155ea..d0b731e 100644
--- a/make/mshadow.mk
+++ b/make/mshadow.mk
@@ -82,7 +82,7 @@ ifneq ($(USE_INTEL_PATH), NONE)
MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/compiler/lib/intel64
MSHADOW_LDFLAGS += -L$(USE_INTEL_PATH)/lib/intel64
endif

  • MSHADOW_CFLAGS += -I$(USE_INTEL_PATH)/mkl/include
  • MSHADOW_CFLAGS += -I$(USE_INTEL_PATH)/mkl/include -DMKL_ILP64 -m64
    endif
    ifneq ($(USE_STATIC_MKL), NONE)
    ifeq ($(USE_INTEL_PATH), NONE)
    @@ -90,7 +90,7 @@ ifeq ($(USE_INTEL_PATH), NONE)
    else
    MKLROOT = $(USE_INTEL_PATH)/mkl
    endif
  • MSHADOW_LDFLAGS += -L${MKLROOT}/../compiler/lib/intel64 -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_lp64.a ${MKLROOT}/lib/intel64/libmkl_core.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -liomp5 -ldl -lpthread -lm
  • MSHADOW_LDFLAGS += -L${MKLROOT}/../compiler/lib/intel64 -Wl,--start-group ${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a ${MKLROOT}/lib/intel64/libmkl_core.a ${MKLROOT}/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -liomp5 -ldl -lpthread -lm
    else

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look forward to the followup PR

float* values, float* X, float* y,
int rows, int cols, int X_columns)
{

sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
sparse_status_t status;
sparse_matrix_t A = NULL;
sparse_layout_t layout = SPARSE_LAYOUT_ROW_MAJOR;
float one, zero;
one = (float)1.0;
zero = (float)0.0;

MKL_INT* rows_end = rows_start + 1;
status = mkl_sparse_s_create_csr(&A, indexing, rows, cols, rows_start, rows_end, col_indx, values);

if (status != SPARSE_STATUS_SUCCESS)
{
std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
return false;
}
sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE;
struct matrix_descr descrA;
descrA.type = SPARSE_MATRIX_TYPE_GENERAL;

status = mkl_sparse_s_mm(operation, one, A, descrA, layout, X, X_columns, X_columns, zero, y, X_columns);
if (status != SPARSE_STATUS_SUCCESS)
{
std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
return false;
}

mkl_sparse_destroy(A);

return true;

}
48 changes: 48 additions & 0 deletions 3rdparty/sparse-matrix/sparse_matrix.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
#define MXNET_OPERATOR_SPARSE_MATRIX_INL_H_


#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER)
#define SP_INT64 __int64
#define SP_UINT64 unsigned __int64
#else
#define SP_INT64 long long int
#define SP_UINT64 unsigned long long int
#endif


#if defined _WIN32 || defined __CYGWIN__
#ifdef BUILDING_DLL
#ifdef __GNUC__
#define SPM_API_PUBLIC __attribute__ ((dllexport))
#else
#define SPM_API_PUBLIC __declspec(dllexport) // Note: actually gcc seems to also supports this syntax.
#endif
#else
#ifdef __GNUC__
#define SPM_API_PUBLIC __attribute__ ((dllimport))
#else
#define SPM_API_PUBLIC __declspec(dllimport) // Note: actually gcc seems to also supports this syntax.
#endif
#endif
#define SPM_API_LOCAL
#else
#if __GNUC__ >= 4
#define SPM_API_PUBLIC __attribute__ ((visibility ("default")))
#define SPM_API_LOCAL __attribute__ ((visibility ("hidden")))
#else
#define SPM_API_PUBLIC
#define SPM_API_LOCAL
#endif
#endif



extern "C"
{
extern SPM_API_PUBLIC bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx,
float* values, float* X, float* y, int rows, int cols, int X_columns);

}

#endif //MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
34 changes: 34 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ ifeq ($(USE_MKLDNN), 1)
LDFLAGS += -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,'$${ORIGIN}'
endif


# setup opencv
ifeq ($(USE_OPENCV), 1)
CFLAGS += -DMXNET_USE_OPENCV=1
Expand Down Expand Up @@ -410,6 +411,14 @@ ifeq ($(USE_DIST_KVSTORE), 1)
LDFLAGS += $(PS_LDFLAGS_A)
endif

#sparse-matrix
ifeq ($(USE_BLAS), mkl)
SPARSE_MATRIX_DIR = $(ROOTDIR)/3rdparty/sparse-matrix
LIB_DEP += $(SPARSE_MATRIX_DIR)/libsparse_matrix.so
CFLAGS += -I$(SPARSE_MATRIX_DIR)
LDFLAGS += -L$(SPARSE_MATRIX_DIR) -lsparse_matrix -Wl,-rpath,'$${ORIGIN}'
endif

.PHONY: clean all extra-packages test lint docs clean_all rcpplint rcppexport roxygen\
cython2 cython3 cython cyclean

Expand Down Expand Up @@ -547,11 +556,30 @@ ifeq ($(UNAME_S), Darwin)
endif
endif

ifeq ($(USE_BLAS), mkl)
ifeq ($(UNAME_S), Darwin)
install_name_tool -change '@rpath/libsparse_matrix.dylib' '@loader_path/libsparse_matrix.dylib' $@
endif
endif

$(PS_PATH)/build/libps.a: PSLITE

PSLITE:
$(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps

ifeq ($(USE_BLAS), mkl)
$(SPARSE_MATRIX_DIR)/libsparse_matrix.so: SPARSE_MATRIX

SPARSE_MATRIX:
ifeq ($(USE_INTEL_PATH), NONE)
$(MAKE) -C $(SPARSE_MATRIX_DIR)
else
$(MAKE) -C $(SPARSE_MATRIX_DIR) USE_INTEL_PATH=$(USE_INTEL_PATH)
endif
mkdir -p $(ROOTDIR)/lib
cp $(SPARSE_MATRIX_DIR)/libsparse_matrix.so $(ROOTDIR)/lib/
endif

$(DMLC_CORE)/libdmlc.a: DMLCCORE

DMLCCORE:
Expand Down Expand Up @@ -628,6 +656,10 @@ rpkg:
cp -rf lib/libmklml_intel.so R-package/inst/libs; \
fi

if [ -e "lib/libsparse_matrix.so" ]; then \
cp -rf lib/libsparse_matrix.so R-package/inst/libs; \
fi

mkdir -p R-package/inst/include
cp -rl include/* R-package/inst/include
Rscript -e "if(!require(devtools)){install.packages('devtools', repo = 'https://cloud.r-project.org/')}"
Expand Down Expand Up @@ -673,6 +705,7 @@ clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
$(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %, %/*/*.d, $(EXTRA_OPERATORS))
Expand All @@ -683,6 +716,7 @@ clean: rclean mkldnn_clean cyclean testclean $(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
endif
Expand Down
2 changes: 1 addition & 1 deletion ci/jenkins/Jenkins_steps.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/li
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, lib/libsparse_matrix.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
mx_tensorrt_lib = 'build/libmxnet.so, lib/libnvonnxparser_runtime.so.0, lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a, build/cpp-package/example/*'
mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*'
Expand Down
28 changes: 26 additions & 2 deletions src/operator/tensor/dot-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
#ifdef __CUDACC__
#include "./dot-inl.cuh"
#endif // __CUDACC__

#if (MSHADOW_USE_MKL == 1)
#include "sparse_matrix.h"
#endif
namespace mxnet {
namespace op {

Expand Down Expand Up @@ -775,13 +777,35 @@ inline void DotCsrDnsDnsImpl(const OpContext& ctx,
}

using nnvm::dim_t;

#if (MSHADOW_USE_MKL == 1)
TShape lhs_shape = lhs.shape();
TShape rhs_shape = rhs.shape_;
#endif
const TBlob data_l = lhs.data();
const TBlob indptr_l = lhs.aux_data(csr::kIndPtr);
const TBlob col_idx_l = lhs.aux_data(csr::kIdx);
const TBlob& data_r = rhs;
const TBlob data_out = *ret;

#if (MSHADOW_USE_MKL == 1)
if (data_l.type_flag_ == mshadow::kFloat32
eric-haibin-lin marked this conversation as resolved.
Show resolved Hide resolved
&& indptr_l.type_flag_ == mshadow::kInt64
&& col_idx_l.type_flag_ == mshadow::kInt64
&& !trans_lhs) {
bool ret = mkl_DotCsrDnsDns(static_cast<SP_INT64*>(indptr_l.dptr_),
static_cast<SP_INT64*>(col_idx_l.dptr_),
data_l.dptr<float>(),
data_r.dptr<float>(),
data_out.dptr<float>(),
lhs_shape[0],
lhs_shape[1],
rhs_shape[1]);
if (ret) {
return;
}
}
#endif

MSHADOW_SGL_DBL_TYPE_SWITCH(data_l.type_flag_, DType, { // data type
MSHADOW_IDX_TYPE_SWITCH(indptr_l.type_flag_, IType, { // indptr type
MSHADOW_IDX_TYPE_SWITCH(col_idx_l.type_flag_, CType, { // col idx type
Expand Down