Merge pull request #7 from dmlc/master

Merge Back
apache · Oct 29, 2015 · 8d0ef5d · 8d0ef5d
2 parents b9363ba + 39d5f36
commit 8d0ef5d
Show file tree

Hide file tree

Showing 163 changed files with 6,323 additions and 1,776 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -6,4 +6,4 @@
 	url = https://github.com/dmlc/dmlc-core.git
 [submodule "ps-lite"]
 	path = ps-lite
-	url = https://github.com/dmlc/ps-lite.git
+	url = https://github.com/dmlc/ps-lite
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -33,9 +33,6 @@ else(MSVC)
 endif(MSVC)
 
 if(USE_OPENCV)
-  if(MSVC)
-    set(OpenCV_STATIC OFF)
-  endif()
   find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
   if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found
     find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc)
@@ -68,14 +65,26 @@ if(USE_CUDNN)
 endif()
 
 add_subdirectory("dmlc-core")
-add_subdirectory("ps-lite")
+if(NOT MSVC)
+  add_subdirectory("ps-lite")
+endif()
 
 mxnet_source_group("Source"   GLOB_RECURSE "src/*.cc")
 mxnet_source_group("Source\\Cuda" GLOB_RECURSE "src/*.cu")
 
 FILE(GLOB_RECURSE SOURCE "src/*.cc")
 FILE(GLOB_RECURSE cuda "src/*.cu")
 
+if(MSVC)
+  foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+    if(${flag_var} MATCHES "/MD")
+      string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+    endif(${flag_var} MATCHES "/MD")
+  endforeach(flag_var)
+endif()
+
 if(USE_CUDA)
   # define preprocessor macro so that we will not include the generated forcelink header
   mshadow_cuda_compile(cuda_objs ${cuda})
@@ -93,8 +102,10 @@ endif()
 add_library(mxnet SHARED ${SOURCE})
 target_link_libraries(mxnet ${mshadow_LINKER_LIBS})
 target_link_libraries(mxnet dmlccore)
-target_link_libraries(mxnet pslite)
-target_link_libraries(mxnet ${pslite_LINKER_LIBS})
+if(NOT MSVC)
+  target_link_libraries(mxnet pslite)
+  target_link_libraries(mxnet ${pslite_LINKER_LIBS})
+endif()
 set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
 
 # ---[ Linter target

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -40,14 +40,18 @@ List of Contributors
 * [Full List of Contributors](https://github.com/dmlc/mxnet/graphs/contributors)
   - To contributors: please add your name to the list when you submit a patch to the project:)
 * [Qiang Kou](https://github.com/thirdwing)
-  - KK is a R ninja, he will make mxnet available for R users.
+  - KK is a R ninja, he makes mxnet available for R users.
+* [Tong He](https://github.com/hetong007)
+  - Tong is the major maintainer of MXNetR, he designs the mxnet interface and wrote many of the tutorials on R.
 * [Feng Wang](https://github.com/happynear)
   - Feng makes mxnet compatible with Windows Visual Studio.
 * [Li Dong](https://github.com/donglixp)
 * [Piji Li](https://github.com/lipiji)
 * [Hu Shiwen](https://github.com/yajiedesign)
 * [Boyuan Deng](https://github.com/bryandeng)
-* [Tong He](https://github.com/hetong007)
 * [Junran He](https://github.com/junranhe)
   - Junran makes device kvstore allocation strategy smarter
 * [Shuzhe Wu](https://github.com/II-Matto)
+* [Xiaodong](https://github.com/XD-DENG)
+* [Nan Xiao](https://github.com/road2stat)
+* [Junyuan Xie](https://github.com/piiswrong)
diff --git a/Makefile b/Makefile
@@ -34,7 +34,11 @@ else
 endif
 CFLAGS += -I./mshadow/ -I./dmlc-core/include -fPIC -Iinclude $(MSHADOW_CFLAGS)
 LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
-NVCCFLAGS = --use_fast_math -g -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
+ifeq ($(DEBUG), 1)
+	NVCCFLAGS = -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
+else
+	NVCCFLAGS = --use_fast_math -g -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
+endif
 ROOTDIR = $(CURDIR)
 
 ifndef LINT_LANG
@@ -80,7 +84,7 @@ PS_PATH=./ps-lite
 DEPS_PATH=$(shell pwd)/deps
 include $(PS_PATH)/make/ps.mk
 ifeq ($(USE_DIST_KVSTORE), 1)
-	CFLAGS += -DMXNET_USE_DIST_KVSTORE -I$(PS_PATH)/src
+	CFLAGS += -DMXNET_USE_DIST_KVSTORE -I$(PS_PATH)/include -I$(DEPS_PATH)/include
 	LIB_DEP += $(PS_PATH)/build/libps.a
 	LDFLAGS += -Wl,-rpath,$(DEPS_PATH)/lib $(PS_LDFLAGS_SO)
 endif
@@ -100,12 +104,12 @@ ifeq ($(USE_CUDA), 1)
 	ALL_DEP += $(CUOBJ)
 endif
 
-build/%.o: src/%.cc $(LIB_DEP)
+build/%.o: src/%.cc
 	@mkdir -p $(@D)
 	$(CXX) -std=c++0x $(CFLAGS) -MM -MT build/$*.o $< >build/$*.d
 	$(CXX) -std=c++0x -c $(CFLAGS) -c $< -o $@
 
-build/%_gpu.o: src/%.cu $(LIB_DEP)
+build/%_gpu.o: src/%.cu
 	@mkdir -p $(@D)
 	$(NVCC) $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" -M build/$*_gpu.o $< >build/$*_gpu.d
 	$(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $<
@@ -120,7 +124,7 @@ lib/libmxnet.so: $(ALL_DEP)
 
 # ps-lite
 $(PS_PATH)/build/libps.a:
-	$(MAKE) CXX=$(CXX) DEPS_PATH=$(DEPS_PATH) -C $(PS_PATH) deps
+	$(MAKE) CXX=$(CXX) DEPS_PATH=$(DEPS_PATH) -C $(PS_PATH) protobuf zmq
 	$(MAKE) CXX=$(CXX) DEPS_PATH=$(DEPS_PATH) -C $(PS_PATH) ps
 
 $(DMLC_CORE)/libdmlc.a:
@@ -137,7 +141,7 @@ include tests/cpp/unittest.mk
 test: $(TEST)
 
 lint: rcpplint
-	python dmlc-core/scripts/lint.py mxnet ${LINT_LANG} include src scripts python
+	python dmlc-core/scripts/lint.py mxnet ${LINT_LANG} include src scripts python predict/python
 
 doc: doxygen
 

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
@@ -33,7 +33,9 @@ export(mx.io.extract)
 export(mx.kv.create)
 export(mx.metric.accuracy)
 export(mx.metric.custom)
+export(mx.metric.mae)
 export(mx.metric.rmse)
+export(mx.metric.rmsle)
 export(mx.model.FeedForward.create)
 export(mx.model.load)
 export(mx.model.save)

diff --git a/R-package/R/callback.R b/R-package/R/callback.R
@@ -2,7 +2,7 @@
 #' @export
 mx.callback.log.train.metric <- function(period) {
   function(iteration, nbatch, env) {
-    if (nbatch %% period == 0) {
+    if (nbatch %% period == 0 && !is.null(env$metric)) {
       result <- env$metric$get(env$train.metric)
       cat(paste0("Batch [", nbatch, "] Train-", result$name, "=", result$value, "\n"))
     }

diff --git a/R-package/R/io.R b/R-package/R/io.R
@@ -21,7 +21,8 @@ mx.io.extract <- function(iter, field) {
     padded <- iter$num.pad()
     data <- dlist[[field]]
     oshape <- dim(data)
-    packer$push(mx.nd.slice(data, 0, oshape[[1]] - padded))
+    ndim <- length(oshape)
+    packer$push(mx.nd.slice(data, 0, oshape[[ndim]] - padded))
   }
   iter$reset()
   return(packer$get())

diff --git a/R-package/R/metric.R b/R-package/R/metric.R
@@ -1,7 +1,7 @@
 #' Helper function to create a customized metric
-#' 
+#'
 #' @export
-mx.metric.custom <-function(name, feval) {
+mx.metric.custom <- function(name, feval) {
   init <- function() {
     c(0, 0)
   }
@@ -18,18 +18,34 @@ mx.metric.custom <-function(name, feval) {
   return(ret)
 }
 
-#' Accuracy metric
+#' Accuracy metric for classification
 #'
 #' @export
 mx.metric.accuracy <- mx.metric.custom("accuracy", function(label, pred) {
-  ypred = max.col(pred, tie="first")
-  return(sum((label + 1) == ypred) / length(label))
+  ypred = max.col(t(as.array(pred)), tie="first")
+  return(sum((as.array(label) + 1) == ypred) / length(label))
 })
 
-#' RMSE metric
-#' 
+#' RMSE (Root Mean Squared Error) metric for regression
+#'
 #' @export
 mx.metric.rmse <- mx.metric.custom("rmse", function(label, pred) {
   res <- sqrt(mean((label-pred)^2))
   return(res)
 })
+
+#' MAE (Mean Absolute Error) metric for regression
+#'
+#' @export
+mx.metric.mae <- mx.metric.custom("mae", function(label, pred) {
+  res <- mean(abs(label-pred))
+  return(res)
+})
+
+#' RMSLE (Root Mean Squared Logarithmic Error) metric for regression
+#'
+#' @export
+mx.metric.rmsle <- mx.metric.custom("rmsle", function(label, pred) {
+  res <- sqrt(mean((log(pred + 1) - log(label + 1))^2))
+  return(res)
+})
diff --git a/R-package/R/model.R b/R-package/R/model.R
@@ -1,12 +1,13 @@
 # slice the shape on the highest dimension
 mx.model.slice.shape <- function(shape, nsplit) {
-  batchsize <- shape[[1]]
+  ndim <- length(shape)
+  batchsize <- shape[[ndim]]
   step <- as.integer((batchsize + nsplit - 1) / nsplit)
   lapply(0:(nsplit - 1), function(k) {
     begin = min(k * step, batchsize)
     end = min((k + 1) * step, batchsize)
     s <- shape
-    s[[1]] = end - begin
+    s[[ndim]] = end - begin
     return(list(begin=begin, end=end, shape=s))
   })
 }
@@ -266,12 +267,75 @@ mx.model.init.iter <- function(X, y, batch.size, is.train) {
   if (is.null(y)) {
     if (is.train) stop("Need to provide parameter y for training with R arrays.")
     shape <- dim(X)
-    y <- c(1:shape[[1]]) * 0
+    ndim <- length(shape)
+    y <- c(1:shape[[ndim]]) * 0
   }
   batch.size <- min(length(y), batch.size)
   return(mx.io.arrayiter(X, y, batch.size=batch.size, shuffle=is.train))
 }
 
+# select layout by matching shape, report error if nothing matches up.
+mx.model.select.layout.train <- function(X, y) {
+  if (is.null(y)) stop("Need to provide y for training")
+  y <- as.array(y)
+  dimX <- dim(X)
+  dimy <- dim(y)
+  if (length(dimX) != 2) return("colmajor")
+  rowmajor <- 0
+  colmajor <- 0
+  if (dimX[[1]] == dimy[[1]]) rowmajor <- 1
+  if (dimX[[length(dimX)]] == dimy[[length(dimy)]]) colmajor <- 1
+  if (rowmajor + colmajor != 1) {
+    stop("Cannot auto select array.layout, please specify this parameter")
+  }
+  if (rowmajor == 1) {
+    cat("Auto detect layout of input matrix, use rowmajor..\n")
+    return("rowmajor")
+  } else{
+    cat("Auto detect layout input matrix, use colmajor..\n")
+    return("colmajor")
+  }
+}
+
+# select layout by matching shape, report error if nothing matches up.
+mx.model.select.layout.predict <- function(X, model) {
+  dimX <- dim(X)
+  if (length(dimX) != 2) return("colmajor")
+  rowmajor <- 1
+  colmajor <- 1
+  # try row major
+  ret <- mx.symbol.infer.shape(model$symbol, data=c(dimX[[2]], 1))
+  if (!is.null(ret)) {
+    names = names(model$arg.params)
+    for (i in 1:length(names)) {
+      if (any(ret$arg.shapes[[names[i]]] != dim(model$arg.params[[i]]))) {
+        rowmajor <- 0
+      }
+    }
+  }
+  # try col major
+  ret <- mx.symbol.infer.shape(model$symbol, data=c(dimX[[1]], 1))
+  if (!is.null(ret)) {
+    names = names(model$arg.params)
+    for (i in 1:length(names)) {
+      if (any(ret$arg.shapes[[names[i]]] != dim(model$arg.params[[i]]))) {
+        colmajor <- 0
+      }
+    }
+  }
+  if (rowmajor + colmajor != 1) {
+    stop("Cannot auto select array.layout, please specify this parameter")
+  }
+  if (rowmajor == 1) {
+    cat("Auto detect layout of input matrix, use rowmajor..\n")
+    return("rowmajor")
+  } else{
+    cat("Auto detect layout input matrix, use colmajor..\n")
+    return("colmajor")
+  }
+}
+
+
 #' Create a MXNet Feedforward neural net model with the specified training.
 #'
 #' @param symbol The symbolic configuration of the neural network.
@@ -297,6 +361,12 @@ mx.model.init.iter <- function(X, y, batch.size, is.train) {
 #'     The callback when one mini-batch iteration ends.
 #' @param array.batch.size integer (default=128)
 #'     The batch size used for R array training.
+#' @param array.layout can be "auto", "colmajor", "rowmajor", (detault=auto)
+#'     The layout of array. "rowmajor" is only supported for two dimensional array.
+#'     For matrix, "rowmajor" means dim(X) = c(nexample, nfeatures),
+#'     "colmajor" means dim(X) = c(nfeatures, nexample)
+#'     "auto" will auto detect the layout by match the feature size,
+#'      and will report error when X is a square matrix to ask user to explicitly specify layout.
 #' @param kvstore string (default="local")
 #'     The parameter synchronization scheme in multiple devices.
 #' @return model A trained mxnet model.
@@ -308,9 +378,17 @@ function(symbol, X, y=NULL, ctx=NULL,
          initializer=mx.init.uniform(0.01),
          eval.data=NULL, eval.metric=NULL,
          iter.end.callback=NULL, epoch.end.callback=NULL,
-         array.batch.size=128,
+         array.batch.size=128, array.layout="auto",
          kvstore="local",
          ...) {
+  if (is.array(X) || is.matrix(X)) {
+    if (array.layout == "auto") {
+      array.layout <- mx.model.select.layout.train(X, y)
+    }
+    if (array.layout == "rowmajor") {
+      X <- t(X)
+    }
+  }
   X <- mx.model.init.iter(X, y, batch.size=array.batch.size, is.train=TRUE)
   if (!X$iter.next()) {
     x$reset()
@@ -324,7 +402,8 @@ function(symbol, X, y=NULL, ctx=NULL,
   }
   if (!is.list(ctx)) stop("ctx must be mx.context or list of mx.context")
   if (is.character(optimizer)) {
-    batchsize = input.shape[[1]]
+    ndim <- length(input.shape)
+    batchsize = input.shape[[ndim]]
     optimizer <- mx.opt.create(optimizer, rescale.grad=(1/batchsize), ...)
   }
 
@@ -346,10 +425,24 @@ function(symbol, X, y=NULL, ctx=NULL,
 #' @param X The dataset to predict.
 #' @param ctx mx.cpu() or mx.gpu(i) The device used to generate the prediction.
 #' @param array.batch.size The batch size used in batching. Only used when X is R's array.
+#' @param array.layout can be "auto", "colmajor", "rowmajor", (detault=auto)
+#'     The layout of array. "rowmajor" is only supported for two dimensional array.
+#'     For matrix, "rowmajor" means dim(X) = c(nexample, nfeatures),
+#'     "colmajor" means dim(X) = c(nfeatures, nexample)
+#'     "auto" will auto detect the layout by match the feature size,
+#'      and will report error when X is a square matrix to ask user to explicitly specify layout.
 #'
 #' @export
-predict.MXFeedForwardModel <- function(model, X, ctx=NULL, array.batch.size=128) {
+predict.MXFeedForwardModel <- function(model, X, ctx=NULL, array.batch.size=128, array.layout="auto") {
   if (is.null(ctx)) ctx <- mx.ctx.default()
+  if (is.array(X) || is.matrix(X)) {
+    if (array.layout == "auto") {
+      array.layout <- mx.model.select.layout.predict(X, model)
+    }
+    if (array.layout == "rowmajor") {
+      X <- t(X)
+    }
+  }
   X <- mx.model.init.iter(X, NULL, batch.size=array.batch.size, is.train=FALSE)
   X$reset()
   if (!X$iter.next()) stop("Cannot predict on empty iterator")
@@ -366,7 +459,8 @@ predict.MXFeedForwardModel <- function(model, X, ctx=NULL, array.batch.size=128)
     out.pred <- mx.nd.copyto(pexec$ref.outputs[[1]], mx.cpu())
     padded <- X$num.pad()
     oshape <- dim(out.pred)
-    packer$push(mx.nd.slice(out.pred, 0, oshape[[1]] - padded))
+    ndim <- length(oshape)
+    packer$push(mx.nd.slice(out.pred, 0, oshape[[ndim]] - padded))
   }
   X$reset()
   return(packer$get())