diff --git a/example/Makefile.openblas b/example/Makefile.openblas new file mode 100644 index 000000000000..bd90eca3922a --- /dev/null +++ b/example/Makefile.openblas @@ -0,0 +1,37 @@ +# set LD_LIBRARY_PATH +# echo "Link mshadow with precomplied Openblas" +export OPENBLAS_ROOT=../../OpenBLAS-v0.2.13-Win64-int32 +export CC = gcc +export CXX = g++ +export NVCC =nvcc +export CFLAGS = -Wall -O3 -msse3 -Wno-unknown-pragmas -funroll-loops -I../ -I$(OPENBLAS_ROOT)/include -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_MKL=0 -DMSHADOW_USE_CBLAS=1 -D__APPLE__ +export LDFLAGS= -static -lpthread -lopenblas -L$(OPENBLAS_ROOT)/lib +export NVCCFLAGS = -O3 --use_fast_math -ccbin $(CXX) + +# specify tensor path +BIN = basic defop basic-matrix-dot +OBJ = +CUOBJ = +CUBIN = +.PHONY: clean all + +all: $(BIN) $(OBJ) $(CUBIN) $(CUOBJ) + +basic: basic.cpp +defop: defop.cpp +basic-matrix-dot: basic-matrix-dot.cpp + +$(BIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(LDFLAGS) + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +$(CUOBJ) : + $(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) + +$(CUBIN) : + $(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^) + +clean: + $(RM) $(OBJ) $(BIN) $(CUBIN) $(CUOBJ) *~ diff --git a/example/basic-matrix-dot.cpp b/example/basic-matrix-dot.cpp new file mode 100644 index 000000000000..5c5485beb238 --- /dev/null +++ b/example/basic-matrix-dot.cpp @@ -0,0 +1,20 @@ +// header file to use mshadow +#include "mshadow/tensor.h" +// this namespace contains all data structures, functions +using namespace mshadow; +// this namespace contains all operator overloads +using namespace mshadow::expr; + +int main( void ){ + // intialize tensor engine before using tensor operation, needed for CuBLAS + InitTensorEngine(); + + Tensor mat = NewTensor( Shape2(1000,1000), 1.0 ); + for (int i=0;i<100;i++) + mat = dot(mat, mat); + FreeSpace(mat); + // shutdown tensor enigne after usage + + ShutdownTensorEngine(); + return 0; +} diff --git a/example/neuralnet/Makefile.openblas b/example/neuralnet/Makefile.openblas new file mode 100644 index 000000000000..ef82c1115df7 --- /dev/null +++ b/example/neuralnet/Makefile.openblas @@ -0,0 +1,36 @@ +# set LD_LIBRARY_PATH +# echo "Link mshadow with precomplied Openblas" +export OPENBLAS_ROOT=../../../OpenBLAS-v0.2.13-Win64-int32 +export CC = gcc +export CXX = g++ +export NVCC =nvcc +export CFLAGS = -Wall -O3 -msse3 -Wno-unknown-pragmas -funroll-loops -I../../ -I$(OPENBLAS_ROOT)/include -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_MKL=0 -DMSHADOW_USE_CBLAS=1 -D__APPLE__ +export LDFLAGS= -static -lpthread -lopenblas -L$(OPENBLAS_ROOT)/lib +export NVCCFLAGS = -O3 --use_fast_math -ccbin $(CXX) + +# specify tensor path +BIN = nnet convnet +OBJ = +CUOBJ = +CUBIN = +.PHONY: clean all + +all: $(BIN) $(OBJ) $(CUBIN) $(CUOBJ) + +nnet: nnet.cpp +convnet: convnet.cpp + +$(BIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(LDFLAGS) + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +$(CUOBJ) : + $(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^) + +$(CUBIN) : + $(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^) + +clean: + $(RM) $(OBJ) $(BIN) $(CUBIN) $(CUOBJ) *~ diff --git a/example/neuralnet/build_openblash.sh b/example/neuralnet/build_openblash.sh new file mode 100644 index 000000000000..dd33f2cbc07c --- /dev/null +++ b/example/neuralnet/build_openblash.sh @@ -0,0 +1,3 @@ +mv nnet.cu nnet.cpp +mv convnet.cu convnet.cpp +make -f Makefile.openblas \ No newline at end of file diff --git a/example/neuralnet/convnet.cu b/example/neuralnet/convnet.cu index 448810e126f4..de8f65b5568b 100644 --- a/example/neuralnet/convnet.cu +++ b/example/neuralnet/convnet.cu @@ -202,7 +202,9 @@ int main( int argc, char *argv[] ){ // choose which version to use INNet *net; if( !strcmp( argv[1], "gpu") ) { +#if DMSHADOW_USE_CUDA==1 net = new ConvNet( batch_size, insize, nchannel, ksize, kstride, psize, num_out ); +#endif }else{ net = new ConvNet( batch_size, insize, nchannel, ksize, kstride, psize, num_out ); } diff --git a/example/neuralnet/nnet.cu b/example/neuralnet/nnet.cu index 75c623a68d7e..a1b4dc2f67f5 100644 --- a/example/neuralnet/nnet.cu +++ b/example/neuralnet/nnet.cu @@ -135,7 +135,9 @@ int main( int argc, char *argv[] ){ // choose which version to use INNet *net; if( !strcmp( argv[1], "gpu") ) { +#if DMSHADOW_USE_CUDA==1 net = new NNet( batch_size, num_in, num_hidden, num_out ); +#endif }else{ net = new NNet( batch_size, num_in, num_hidden, num_out ); }