diff --git a/example/Makefile.openblas b/example/Makefile.openblas
new file mode 100644
index 000000000000..bd90eca3922a
--- /dev/null
+++ b/example/Makefile.openblas
@@ -0,0 +1,37 @@
+# set LD_LIBRARY_PATH
+# echo "Link mshadow with precomplied Openblas"
+export OPENBLAS_ROOT=../../OpenBLAS-v0.2.13-Win64-int32
+export CC  = gcc
+export CXX = g++
+export NVCC =nvcc
+export CFLAGS = -Wall -O3 -msse3 -Wno-unknown-pragmas -funroll-loops -I../ -I$(OPENBLAS_ROOT)/include -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_MKL=0 -DMSHADOW_USE_CBLAS=1 -D__APPLE__
+export LDFLAGS= -static -lpthread -lopenblas -L$(OPENBLAS_ROOT)/lib
+export NVCCFLAGS = -O3 --use_fast_math -ccbin $(CXX)
+
+# specify tensor path
+BIN = basic defop basic-matrix-dot
+OBJ =
+CUOBJ =
+CUBIN =
+.PHONY: clean all
+
+all: $(BIN) $(OBJ) $(CUBIN) $(CUOBJ)
+
+basic: basic.cpp
+defop: defop.cpp
+basic-matrix-dot: basic-matrix-dot.cpp
+
+$(BIN) :
+	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)  $(LDFLAGS)
+
+$(OBJ) :
+	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
+
+$(CUOBJ) :
+	$(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^)
+
+$(CUBIN) :
+	$(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^)
+
+clean:
+	$(RM) $(OBJ) $(BIN) $(CUBIN) $(CUOBJ) *~
diff --git a/example/basic-matrix-dot.cpp b/example/basic-matrix-dot.cpp
new file mode 100644
index 000000000000..5c5485beb238
--- /dev/null
+++ b/example/basic-matrix-dot.cpp
@@ -0,0 +1,20 @@
+// header file to use mshadow
+#include "mshadow/tensor.h"
+// this namespace contains all data structures, functions
+using namespace mshadow;
+// this namespace contains all operator overloads
+using namespace mshadow::expr;
+
+int main( void ){
+    // intialize tensor engine before using tensor operation, needed for CuBLAS
+    InitTensorEngine();
+
+    Tensor<cpu,2> mat = NewTensor<cpu>( Shape2(1000,1000), 1.0 ); 
+	for (int i=0;i<100;i++)
+		mat = dot(mat, mat);
+	FreeSpace(mat);
+    // shutdown tensor enigne after usage
+	
+    ShutdownTensorEngine();
+    return 0;
+}
diff --git a/example/neuralnet/Makefile.openblas b/example/neuralnet/Makefile.openblas
new file mode 100644
index 000000000000..ef82c1115df7
--- /dev/null
+++ b/example/neuralnet/Makefile.openblas
@@ -0,0 +1,36 @@
+# set LD_LIBRARY_PATH
+# echo "Link mshadow with precomplied Openblas"
+export OPENBLAS_ROOT=../../../OpenBLAS-v0.2.13-Win64-int32
+export CC  = gcc
+export CXX = g++
+export NVCC =nvcc
+export CFLAGS = -Wall -O3 -msse3 -Wno-unknown-pragmas -funroll-loops -I../../ -I$(OPENBLAS_ROOT)/include -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_MKL=0 -DMSHADOW_USE_CBLAS=1 -D__APPLE__
+export LDFLAGS= -static -lpthread -lopenblas -L$(OPENBLAS_ROOT)/lib
+export NVCCFLAGS = -O3 --use_fast_math -ccbin $(CXX)
+
+# specify tensor path
+BIN = nnet convnet
+OBJ =
+CUOBJ =
+CUBIN =
+.PHONY: clean all
+
+all: $(BIN) $(OBJ) $(CUBIN) $(CUOBJ)
+
+nnet: nnet.cpp
+convnet: convnet.cpp
+
+$(BIN) :
+	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)  $(LDFLAGS)
+
+$(OBJ) :
+	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
+
+$(CUOBJ) :
+	$(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $(filter %.cu, $^)
+
+$(CUBIN) :
+	$(NVCC) -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -Xlinker "$(LDFLAGS)" $(filter %.cu %.cpp %.o, $^)
+
+clean:
+	$(RM) $(OBJ) $(BIN) $(CUBIN) $(CUOBJ) *~
diff --git a/example/neuralnet/build_openblash.sh b/example/neuralnet/build_openblash.sh
new file mode 100644
index 000000000000..dd33f2cbc07c
--- /dev/null
+++ b/example/neuralnet/build_openblash.sh
@@ -0,0 +1,3 @@
+mv nnet.cu	nnet.cpp
+mv convnet.cu	convnet.cpp
+make -f Makefile.openblas
\ No newline at end of file
diff --git a/example/neuralnet/convnet.cu b/example/neuralnet/convnet.cu
index 448810e126f4..de8f65b5568b 100644
--- a/example/neuralnet/convnet.cu
+++ b/example/neuralnet/convnet.cu
@@ -202,7 +202,9 @@ int main( int argc, char *argv[] ){
     // choose which version to use
     INNet *net;
     if( !strcmp( argv[1], "gpu") ) {
+#if DMSHADOW_USE_CUDA==1
         net = new ConvNet<gpu>( batch_size, insize, nchannel, ksize, kstride, psize, num_out );
+#endif
     }else{
         net = new ConvNet<cpu>( batch_size, insize, nchannel, ksize, kstride, psize, num_out );
     }
diff --git a/example/neuralnet/nnet.cu b/example/neuralnet/nnet.cu
index 75c623a68d7e..a1b4dc2f67f5 100644
--- a/example/neuralnet/nnet.cu
+++ b/example/neuralnet/nnet.cu
@@ -135,7 +135,9 @@ int main( int argc, char *argv[] ){
     // choose which version to use
     INNet *net;
     if( !strcmp( argv[1], "gpu") ) {
+#if DMSHADOW_USE_CUDA==1
         net = new NNet<gpu>( batch_size, num_in, num_hidden, num_out );
+#endif
     }else{
         net = new NNet<cpu>( batch_size, num_in, num_hidden, num_out );
     }