diff --git a/.gitignore b/.gitignore
index 2d1e5d842da4..749197668afc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,3 +108,10 @@ scala-package/*/*/target/
*.project
*.settings
!scala-package/*/bin
+*.bak
+*/node_modules/
+
+# Eclipse project config
+.project
+.cproject
+.pydevproject
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7613fe00375b..b81b1910c015 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,14 +10,17 @@ mxnet_option(USE_OPENMP "Build with Openmp support" ON)
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
mxnet_option(USE_CUDA "Build with CUDA support" ON)
mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
+mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH")
-
include(mshadow/cmake/mshadow.cmake)
include(mshadow/cmake/Utils.cmake)
include(mshadow/cmake/Cuda.cmake)
+set(mxnet_LINKER_LIBS "")
+list(APPEND mxnet_LINKER_LIBS ${mshadow_LINKER_LIBS})
+
include_directories("include")
include_directories("mshadow")
include_directories("dmlc-core/include")
@@ -29,7 +32,7 @@ if(MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
add_definitions(-DMXNET_EXPORTS)
set(CMAKE_C_FLAGS "/MP")
- set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS}")
+ set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
else(MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
@@ -44,7 +47,7 @@ if(USE_OPENCV)
find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc)
endif()
include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
- list(APPEND mshadow_LINKER_LIBS ${OpenCV_LIBS})
+ list(APPEND mxnet_LINKER_LIBS ${OpenCV_LIBS})
message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})")
add_definitions(-DMXNET_USE_OPENCV=1)
else(USE_OPENCV)
@@ -68,7 +71,7 @@ if(USE_CUDNN)
if(HAVE_CUDNN)
add_definitions(-DUSE_CUDNN)
include_directories(SYSTEM ${CUDNN_INCLUDE})
- list(APPEND mshadow_LINKER_LIBS ${CUDNN_LIBRARY})
+ list(APPEND mxnet_LINKER_LIBS ${CUDNN_LIBRARY})
add_definitions(-DMSHADOW_USE_CUDNN=1)
endif()
endif()
@@ -115,12 +118,22 @@ mxnet_source_group("Source\\resource.cc" GLOB "src/resource.cc/*.cc")
mxnet_source_group("Source\\storage" GLOB "src/storage/*.cc")
mxnet_source_group("Source\\symbol" GLOB "src/symbol/*.cc")
-
-
-
-
FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h")
-FILE(GLOB_RECURSE cuda "src/*.cu")
+FILE(GLOB_RECURSE CUDA "src/*.cu")
+
+if(USE_PLUGINS_WARPCTC)
+ set(WARPCTC_INCLUDE "" CACHE PATH "WARPCTC include")
+ set(WARPCTC_LIB "" CACHE FILEPATH "WARPCTC lib")
+ include_directories(SYSTEM ${WARPCTC_INCLUDE})
+ list(APPEND mxnet_LINKER_LIBS ${WARPCTC_LIB})
+ mxnet_source_group("Include\\plugin\\warpctc" GLOB "plugin/warpctc/*.h")
+ mxnet_source_group("Source\\plugin\\warpctc" GLOB "plugin/warpctc/*.cc")
+ mxnet_source_group("Cuda\\plugin\\warpctc" GLOB "plugin/warpctc/*.cu")
+ FILE(GLOB_RECURSE PLUGINS_SOURCE "plugin/warpctc/*.cc" "plugin/warpctc/*.h")
+ FILE(GLOB_RECURSE PLUGINS_CUSRC "plugin/warpctc/*.cu")
+ list(APPEND SOURCE ${PLUGINS_SOURCE})
+ list(APPEND CUDA ${PLUGINS_CUSRC})
+endif()
if (NOT (EXTRA_OPERATORS STREQUAL ""))
mxnet_source_group("Extra" GLOB_RECURSE "${EXTRA_OPERATORS}/*.cc")
@@ -142,19 +155,18 @@ endif()
if(USE_CUDA)
# define preprocessor macro so that we will not include the generated forcelink header
- mshadow_cuda_compile(cuda_objs ${cuda})
+ mshadow_cuda_compile(cuda_objs ${CUDA})
if(MSVC)
FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
- list(APPEND mshadow_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
+ list(APPEND mxnet_LINKER_LIBS ${CUDA_nvrtc_LIBRARY})
set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib")
- list(APPEND mshadow_LINKER_LIBS ${CUDA_cuda_LIBRARY})
+ list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
else(MSVC)
- list(APPEND mshadow_LINKER_LIBS nvrtc cuda)
+ list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
endif()
- list(APPEND SOURCE ${cuda_objs} ${cuda})
+ list(APPEND SOURCE ${cuda_objs} ${CUDA})
endif()
-
if(NOT MSVC)
# Only add c++11 flags and definitions after cuda compiling
add_definitions(-DDMLC_USE_CXX11)
@@ -170,10 +182,9 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
else()
add_library(mxnet SHARED ${SOURCE})
endif()
-target_link_libraries(mxnet ${mshadow_LINKER_LIBS})
+target_link_libraries(mxnet ${mxnet_LINKER_LIBS})
target_link_libraries(mxnet dmlccore)
-
if(MSVC)
set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
endif()
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index a2578ea469a0..ee6fbcf057d3 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -107,3 +107,6 @@ List of Contributors
* [Yuqi Li](https://github.com/ziyeqinghan)
* [Depeng Liang](https://github.com/Ldpe2G)
* [Kiko Qiu](https://github.com/kikoqiu)
+* [Yang Bo](https://github.com/Atry)
+* [Jonas Amaro](https://github.com/jonasrla)
+* [Yan Li](https://github.com/Godricly)
diff --git a/Makefile b/Makefile
index 1ef81159ad07..ccea1ba0a7b0 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,6 @@ endif
include $(config)
include mshadow/make/mshadow.mk
include $(DMLC_CORE)/make/dmlc.mk
-unexport NO_OPENMP
# all tge possible warning tread
WARNFLAGS= -Wall
@@ -37,9 +36,9 @@ endif
CFLAGS += -I$(ROOTDIR)/mshadow/ -I$(ROOTDIR)/dmlc-core/include -fPIC -Iinclude $(MSHADOW_CFLAGS)
LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
ifeq ($(DEBUG), 1)
- NVCCFLAGS = -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
+ NVCCFLAGS = -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
else
- NVCCFLAGS = -D_FORCE_INLINES -g -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
+ NVCCFLAGS = -std=c++11 -Xcompiler -D_FORCE_INLINES -g -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
endif
ifndef LINT_LANG
@@ -150,8 +149,8 @@ endif
build/src/%.o: src/%.cc
@mkdir -p $(@D)
- $(CXX) -std=c++0x $(CFLAGS) -MM -MT build/src/$*.o $< >build/src/$*.d
- $(CXX) -std=c++0x -c $(CFLAGS) -c $< -o $@
+ $(CXX) -std=c++11 $(CFLAGS) -MM -MT build/src/$*.o $< >build/src/$*.d
+ $(CXX) -std=c++11 -c $(CFLAGS) -c $< -o $@
build/src/%_gpu.o: src/%.cu
@mkdir -p $(@D)
@@ -160,20 +159,20 @@ build/src/%_gpu.o: src/%.cu
build/plugin/%.o: plugin/%.cc
@mkdir -p $(@D)
- $(CXX) -std=c++0x $(CFLAGS) -MM -MT build/plugin/$*.o $< >build/plugin/$*.d
- $(CXX) -std=c++0x -c $(CFLAGS) -c $< -o $@
+ $(CXX) -std=c++11 $(CFLAGS) -MM -MT build/plugin/$*.o $< >build/plugin/$*.d
+ $(CXX) -std=c++11 -c $(CFLAGS) -c $< -o $@
# A nvcc bug cause it to generate "generic/xxx.h" dependencies from torch headers.
# Use CXX to generate dependency instead.
build/plugin/%_gpu.o: plugin/%.cu
@mkdir -p $(@D)
- $(CXX) -std=c++0x $(CFLAGS) -MM -MT build/plugin/$*_gpu.o $< >build/plugin/$*_gpu.d
+ $(CXX) -std=c++11 $(CFLAGS) -MM -MT build/plugin/$*_gpu.o $< >build/plugin/$*_gpu.d
$(NVCC) -c -o $@ $(NVCCFLAGS) -Xcompiler "$(CFLAGS)" $<
$(EXTRA_OPERATORS)/build/%.o: $(EXTRA_OPERATORS)/%.cc
@mkdir -p $(@D)
- $(CXX) -std=c++0x $(CFLAGS) -Isrc/operator -MM -MT $(EXTRA_OPERATORS)/build/$*.o $< >$(EXTRA_OPERATORS)/build/$*.d
- $(CXX) -std=c++0x -c $(CFLAGS) -Isrc/operator -c $< -o $@
+ $(CXX) -std=c++11 $(CFLAGS) -Isrc/operator -MM -MT $(EXTRA_OPERATORS)/build/$*.o $< >$(EXTRA_OPERATORS)/build/$*.d
+ $(CXX) -std=c++11 -c $(CFLAGS) -Isrc/operator -c $< -o $@
$(EXTRA_OPERATORS)/build/%_gpu.o: $(EXTRA_OPERATORS)/%.cu
@mkdir -p $(@D)
@@ -201,7 +200,7 @@ bin/im2rec: tools/im2rec.cc $(ALL_DEP)
$(BIN) :
@mkdir -p $(@D)
- $(CXX) $(CFLAGS) -std=c++0x -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
+ $(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.a %.cc, $^) $(LDFLAGS)
include tests/cpp/unittest.mk
@@ -237,26 +236,26 @@ rpkg: roxygen
scalapkg:
(cd $(ROOTDIR)/scala-package; \
mvn clean package -P$(SCALA_PKG_PROFILE) -Dcxx="$(CXX)" \
- -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
- -Dlddeps="$(LIB_DEP)")
+ -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
+ -Dlddeps="$(LIB_DEP)")
scalatest:
(cd $(ROOTDIR)/scala-package; \
mvn verify -P$(SCALA_PKG_PROFILE) -Dcxx="$(CXX)" \
- -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
- -Dlddeps="$(LIB_DEP)" $(SCALA_TEST_ARGS))
+ -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
+ -Dlddeps="$(LIB_DEP)" $(SCALA_TEST_ARGS))
scalainstall:
(cd $(ROOTDIR)/scala-package; \
mvn install -P$(SCALA_PKG_PROFILE) -DskipTests -Dcxx="$(CXX)" \
- -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
- -Dlddeps="$(LIB_DEP)")
+ -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
+ -Dlddeps="$(LIB_DEP)")
scaladeploy:
(cd $(ROOTDIR)/scala-package; \
mvn deploy -Prelease,$(SCALA_PKG_PROFILE) -DskipTests -Dcxx="$(CXX)" \
- -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
- -Dlddeps="$(LIB_DEP)")
+ -Dcflags="$(CFLAGS)" -Dldflags="$(LDFLAGS)" \
+ -Dlddeps="$(LIB_DEP)")
jnilint:
python2 dmlc-core/scripts/lint.py mxnet-jnicpp cpp scala-package/native/src
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 355233385cbf..ad95fe050ef3 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -26,6 +26,9 @@ export(mx.exec.update.arg.arrays)
export(mx.exec.update.aux.arrays)
export(mx.exec.update.grad.arrays)
export(mx.gpu)
+export(mx.gru)
+export(mx.gru.forward)
+export(mx.gru.inference)
export(mx.init.Xavier)
export(mx.init.create)
export(mx.init.normal)
@@ -80,6 +83,9 @@ export(mx.nd.transpose)
export(mx.nd.zeros)
export(mx.opt.create)
export(mx.opt.get.updater)
+export(mx.rnn)
+export(mx.rnn.forward)
+export(mx.rnn.inference)
export(mx.rnorm)
export(mx.runif)
export(mx.set.seed)
diff --git a/R-package/R/gru.R b/R-package/R/gru.R
new file mode 100644
index 000000000000..d2ffd9a414c2
--- /dev/null
+++ b/R-package/R/gru.R
@@ -0,0 +1,355 @@
+# gru cell symbol
+gru <- function(num.hidden, indata, prev.state, param, seqidx, layeridx, dropout=0) {
+ if (dropout > 0)
+ indata <- mx.symbol.Dropout(data=indata, p=dropout)
+ i2h <- mx.symbol.FullyConnected(data=indata,
+ weight=param$gates.i2h.weight,
+ bias=param$gates.i2h.bias,
+ num.hidden=num.hidden * 2,
+ name=paste0("t", seqidx, ".l", layeridx, ".gates.i2h"))
+ h2h <- mx.symbol.FullyConnected(data=prev.state$h,
+ weight=param$gates.h2h.weight,
+ bias=param$gates.h2h.bias,
+ num.hidden=num.hidden * 2,
+ name=paste0("t", seqidx, ".l", layeridx, ".gates.h2h"))
+ gates <- i2h + h2h
+ slice.gates <- mx.symbol.SliceChannel(gates, num.outputs=2,
+ name=paste0("t", seqidx, ".l", layeridx, ".slice"))
+ update.gate <- mx.symbol.Activation(slice.gates[[1]], act.type="sigmoid")
+ reset.gate <- mx.symbol.Activation(slice.gates[[2]], act.type="sigmoid")
+
+ htrans.i2h <- mx.symbol.FullyConnected(data=indata,
+ weight=param$trans.i2h.weight,
+ bias=param$trans.i2h.bias,
+ num.hidden=num.hidden,
+ name=paste0("t", seqidx, ".l", layeridx, ".trans.i2h"))
+ h.after.reset <- prev.state$h * reset.gate
+ htrans.h2h <- mx.symbol.FullyConnected(data=h.after.reset,
+ weight=param$trans.h2h.weight,
+ bias=param$trans.h2h.bias,
+ num.hidden=num.hidden,
+ name=paste0("t", seqidx, ".l", layeridx, ".trans.h2h"))
+ h.trans <- htrans.i2h + htrans.h2h
+ h.trans.active <- mx.symbol.Activation(h.trans, act.type="tanh")
+ next.h <- prev.state$h + update.gate * (h.trans.active - prev.state$h)
+ return (list(h=next.h))
+}
+
+# unrolled gru network
+gru.unroll <- function(num.gru.layer, seq.len, input.size,
+ num.hidden, num.embed, num.label, dropout=0) {
+ embed.weight <- mx.symbol.Variable("embed.weight")
+ cls.weight <- mx.symbol.Variable("cls.weight")
+ cls.bias <- mx.symbol.Variable("cls.bias")
+ param.cells <- lapply(1:num.gru.layer, function(i) {
+ cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.i2h.weight")),
+ gates.i2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.i2h.bias")),
+ gates.h2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.h2h.weight")),
+ gates.h2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.h2h.bias")),
+ trans.i2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.i2h.weight")),
+ trans.i2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.i2h.bias")),
+ trans.h2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.h2h.weight")),
+ trans.h2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.h2h.bias")))
+ return (cell)
+ })
+ last.states <- lapply(1:num.gru.layer, function(i) {
+ state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
+ return (state)
+ })
+
+ # embeding layer
+ label <- mx.symbol.Variable("label")
+ data <- mx.symbol.Variable("data")
+ embed <- mx.symbol.Embedding(data=data, input.dim=input.size,
+ weight=embed.weight, output.dim=num.embed, name='embed')
+ wordvec <- mx.symbol.SliceChannel(data=embed, num.outputs=seq.len, squeeze.axis=1)
+
+ last.hidden <- list()
+ for (seqidx in 1:seq.len) {
+ hidden <- wordvec[[seqidx]]
+ # stack GRU
+ for (i in 1:num.gru.layer) {
+ dp <- ifelse(i==1, 0, dropout)
+ next.state <- gru(num.hidden, indata=hidden,
+ prev.state=last.states[[i]],
+ param=param.cells[[i]],
+ seqidx=seqidx, layeridx=i,
+ dropout=dp)
+ hidden <- next.state$h
+ last.states[[i]] <- next.state
+ }
+ # decoder
+ if (dropout > 0)
+ hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
+ last.hidden <- c(last.hidden, hidden)
+ }
+ last.hidden$dim <- 0
+ last.hidden$num.args <- seq.len
+ concat <-mxnet:::mx.varg.symbol.Concat(last.hidden)
+ fc <- mx.symbol.FullyConnected(data=concat,
+ weight=cls.weight,
+ bias=cls.bias,
+ num.hidden=num.label)
+
+ label <- mx.symbol.transpose(data=label)
+ label <- mx.symbol.Reshape(data=label, target.shape=c(0))
+
+ loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm")
+ return (loss.all)
+}
+
+# gru inference model symbol
+gru.inference.symbol <- function(num.gru.layer, seq.len, input.size,
+ num.hidden, num.embed, num.label, dropout=0) {
+ seqidx <- 1
+ embed.weight <- mx.symbol.Variable("embed.weight")
+ cls.weight <- mx.symbol.Variable("cls.weight")
+ cls.bias <- mx.symbol.Variable("cls.bias")
+
+ param.cells <- lapply(1:num.gru.layer, function(i) {
+ cell <- list(gates.i2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.i2h.weight")),
+ gates.i2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.i2h.bias")),
+ gates.h2h.weight = mx.symbol.Variable(paste0("l", i, ".gates.h2h.weight")),
+ gates.h2h.bias = mx.symbol.Variable(paste0("l", i, ".gates.h2h.bias")),
+ trans.i2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.i2h.weight")),
+ trans.i2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.i2h.bias")),
+ trans.h2h.weight = mx.symbol.Variable(paste0("l", i, ".trans.h2h.weight")),
+ trans.h2h.bias = mx.symbol.Variable(paste0("l", i, ".trans.h2h.bias")))
+ return (cell)
+ })
+ last.states <- lapply(1:num.gru.layer, function(i) {
+ state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
+ return (state)
+ })
+
+ # embeding layer
+ data <- mx.symbol.Variable("data")
+ hidden <- mx.symbol.Embedding(data=data, input_dim=input.size,
+ weight=embed.weight, output_dim=num.embed, name="embed")
+
+ # stack GRU
+ for (i in 1:num.gru.layer) {
+ dp <- ifelse(i==1, 0, dropout)
+ next.state <- gru(num.hidden, indata=hidden,
+ prev.state=last.states[[i]],
+ param=param.cells[[i]],
+ seqidx=seqidx, layeridx=i,
+ dropout=dp)
+ hidden <- next.state$h
+ last.states[[i]] <- next.state
+ }
+ # decoder
+ if (dropout > 0)
+ hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
+
+ fc <- mx.symbol.FullyConnected(data=hidden, num_hidden=num.label,
+ weight=cls.weight, bias=cls.bias, name='pred')
+ sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm')
+ unpack.h <- lapply(1:num.gru.layer, function(i) {
+ state <- last.states[[i]]
+ state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h"))
+ return (state.h)
+ })
+
+ list.all <- c(sm, unpack.h)
+ return (mx.symbol.Group(list.all))
+}
+
+#' Training GRU Unrolled Model
+#'
+#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array)
+#' The Training set.
+#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), optional
+#' The validation set used for validation evaluation during the progress.
+#' @param num.gru.layer integer
+#' The number of the layer of gru.
+#' @param seq.len integer
+#' The length of the input sequence.
+#' @param num.hidden integer
+#' The number of hidden nodes.
+#' @param num.embed integer
+#' The output dim of embedding.
+#' @param num.label integer
+#' The number of labels.
+#' @param batch.size integer
+#' The batch size used for R array training.
+#' @param input.size integer
+#' The input dim of one-hot encoding of embedding
+#' @param ctx mx.context, optional
+#' The device used to perform training.
+#' @param num.round integer, default=10
+#' The number of iterations over training data to train the model.
+#' @param update.period integer, default=1
+#' The number of iterations to update parameters during training period.
+#' @param initializer initializer object. default=mx.init.uniform(0.01)
+#' The initialization scheme for parameters.
+#' @param dropout float, default=0
+#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.
+#' @param optimizer string, default="sgd"
+#' The optimization method.
+#' @param ... other parameters passing to \code{mx.gru}/.
+#' @return model A trained gru unrolled model.
+#'
+#' @export
+mx.gru <- function( train.data, eval.data=NULL,
+ num.gru.layer, seq.len,
+ num.hidden, num.embed, num.label,
+ batch.size, input.size,
+ ctx=mx.ctx.default(),
+ num.round=10, update.period=1,
+ initializer=mx.init.uniform(0.01),
+ dropout=0, optimizer='sgd',
+ ...) {
+ # check data and change data into iterator
+ train.data <- check.data(train.data, batch.size, TRUE)
+ eval.data <- check.data(eval.data, batch.size, FALSE)
+
+ # get unrolled gru symbol
+ rnn.sym <- gru.unroll( num.gru.layer=num.gru.layer,
+ num.hidden=num.hidden,
+ seq.len=seq.len,
+ input.size=input.size,
+ num.embed=num.embed,
+ num.label=num.label,
+ dropout=dropout)
+
+ init.states.name <- lapply(1:num.gru.layer, function(i) {
+ state.h <- paste0("l", i, ".init.h")
+ return (state.h)
+ })
+
+ # set up gru model
+ model <- setup.rnn.model(rnn.sym=rnn.sym,
+ ctx=ctx,
+ num.rnn.layer=num.gru.layer,
+ seq.len=seq.len,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ batch.size=batch.size,
+ input.size=input.size,
+ init.states.name=init.states.name,
+ initializer=initializer,
+ dropout=dropout)
+
+ # train gru model
+ model <- train.rnn( model, train.data, eval.data,
+ num.round=num.round,
+ update.period=update.period,
+ ctx=ctx,
+ init.states.name=init.states.name,
+ ...)
+ # change model into MXFeedForwardModel
+ model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays)
+ return(structure(model, class="MXFeedForwardModel"))
+}
+
+#' Create a GRU Inference Model
+#'
+#' @param num.gru.layer integer
+#' The number of the layer of gru.
+#' @param input.size integer
+#' The input dim of one-hot encoding of embedding
+#' @param num.hidden integer
+#' The number of hidden nodes.
+#' @param num.embed integer
+#' The output dim of embedding.
+#' @param num.label integer
+#' The number of labels.
+#' @param batch.size integer, default=1
+#' The batch size used for R array training.
+#' @param arg.params list
+#' The batch size used for R array training.
+#' @param ctx mx.context, optional
+#' Model parameter, list of name to NDArray of net's weights.
+#' @param dropout float, default=0
+#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.
+#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+#' A gru inference model.
+#'
+#' @export
+mx.gru.inference <- function(num.gru.layer,
+ input.size,
+ num.hidden,
+ num.embed,
+ num.label,
+ batch.size=1,
+ arg.params,
+ ctx=mx.cpu(),
+ dropout=0.) {
+ sym <- gru.inference.symbol(num.gru.layer=num.gru.layer,
+ input.size=input.size,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ dropout=dropout)
+
+ init.states.name <- lapply(1:num.gru.layer, function(i) {
+ state.h <- paste0("l", i, ".init.h")
+ return (state.h)
+ })
+
+ seq.len <- 1
+ # set up gru model
+ model <- setup.rnn.model(rnn.sym=sym,
+ ctx=ctx,
+ num.rnn.layer=num.gru.layer,
+ seq.len=seq.len,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ batch.size=batch.size,
+ input.size=input.size,
+ init.states.name=init.states.name,
+ initializer=mx.init.uniform(0.01),
+ dropout=dropout)
+ arg.names <- names(model$rnn.exec$ref.arg.arrays)
+ for (k in names(arg.params)) {
+ if ((k %in% arg.names) && is.param.name(k) ) {
+ rnn.input <- list()
+ rnn.input[[k]] <- arg.params[[k]]
+ mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE)
+ }
+ }
+ init.states <- list()
+ for (i in 1:num.gru.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+
+ return (model)
+}
+
+#' Using forward function to predict in gru inference model
+#'
+#' @param model gru model
+#' A gru inference model
+#' @param input.data, array.matrix
+#' The input data for forward function
+#' @param new.seq boolean, default=FALSE
+#' Whether the input is the start of a new sequence
+#'
+#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model.
+#'
+#' @export
+mx.gru.forward <- function(model, input.data, new.seq=FALSE) {
+ if (new.seq == TRUE) {
+ init.states <- list()
+ for (i in 1:model$num.rnn.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+ }
+ dim(input.data) <- c(model$batch.size)
+ data <- list(data=mx.nd.array(input.data))
+ mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE)
+ mx.exec.forward(model$rnn.exec, is.train=FALSE)
+ init.states <- list()
+ for (i in 1:model$num.rnn.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]]
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+ prob <- model$rnn.exec$ref.outputs[["sm_output"]]
+ return (list(prob=prob, model=model))
+}
+
diff --git a/R-package/R/lstm.R b/R-package/R/lstm.R
index 3fcd0e831751..27c1c2e96eac 100644
--- a/R-package/R/lstm.R
+++ b/R-package/R/lstm.R
@@ -56,9 +56,7 @@ lstm.unroll <- function(num.lstm.layer, seq.len, input.size,
last.hidden <- list()
for (seqidx in 1:seq.len) {
-
- hidden = wordvec[[seqidx]]
-
+ hidden <- wordvec[[seqidx]]
# stack lstm
for (i in 1:num.lstm.layer) {
dp <- ifelse(i==1, 0, dropout)
@@ -90,6 +88,7 @@ lstm.unroll <- function(num.lstm.layer, seq.len, input.size,
return (loss.all)
}
+# lstm inference model symbol
lstm.inference.symbol <- function(num.lstm.layer, input.size,
num.hidden, num.embed, num.label, dropout=0.) {
seqidx <- 0
@@ -99,9 +98,9 @@ lstm.inference.symbol <- function(num.lstm.layer, input.size,
param.cells <- lapply(1:num.lstm.layer, function(i) {
cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")),
- i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")),
- h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")),
- h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias")))
+ i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")),
+ h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")),
+ h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias")))
return (cell)
})
last.states <- lapply(1:num.lstm.layer, function(i) {
@@ -148,250 +147,7 @@ lstm.inference.symbol <- function(num.lstm.layer, input.size,
return (mx.symbol.Group(list.all))
}
-is.param.name <- function(name) {
- return (grepl('weight$', name) || grepl('bias$', name) ||
- grepl('gamma$', name) || grepl('beta$', name) )
-}
-
-# Initialize parameters
-mx.model.init.params.rnn <- function(symbol, input.shape, initializer, ctx) {
- if (!is.mx.symbol(symbol)) stop("symbol need to be MXSymbol")
- slist <- symbol$infer.shape(input.shape)
- if (is.null(slist)) stop("Not enough information to get shapes")
- arg.params <- mx.init.create(initializer, slist$arg.shapes, ctx, skip.unknown=TRUE)
- aux.params <- mx.init.create(initializer, slist$aux.shapes, ctx, skip.unknown=FALSE)
- return(list(arg.params=arg.params, aux.params=aux.params))
-}
-
-# Initialize the data iter
-mx.model.init.iter.rnn <- function(X, y, batch.size, is.train) {
- if (is.MXDataIter(X)) return(X)
- shape <- dim(data)
- if (is.null(shape)) {
- num.data <- length(X)
- } else {
- ndim <- length(shape)
- num.data <- shape[[ndim]]
- }
- if (is.null(y)) {
- if (is.train) stop("Need to provide parameter y for training with R arrays.")
- y <- c(1:num.data) * 0
- }
-
- batch.size <- min(num.data, batch.size)
-
- return(mx.io.arrayiter(X, y, batch.size=batch.size, shuffle=is.train))
-}
-
-# set up rnn model with lstm cells
-setup.rnn.model <- function(rnn.sym, ctx,
- num.lstm.layer, seq.len,
- num.hidden, num.embed, num.label,
- batch.size, input.size,
- initializer=mx.init.uniform(0.01),
- dropout=0) {
-
- arg.names <- rnn.sym$arguments
- input.shapes <- list()
- for (name in arg.names) {
- if (grepl('init.c$', name) || grepl('init.h$', name)) {
- input.shapes[[name]] <- c(num.hidden, batch.size)
- }
- else if (grepl('data$', name) || grepl('label$', name) ) {
- if (seq.len == 1) {
- input.shapes[[name]] <- c(batch.size)
- } else {
- input.shapes[[name]] <- c(seq.len, batch.size)
- }
- }
- }
- params <- mx.model.init.params.rnn(rnn.sym, input.shapes, initializer, mx.cpu())
- args <- input.shapes
- args$symbol <- rnn.sym
- args$ctx <- ctx
- args$grad.req <- "add"
- rnn.exec <- do.call(mx.simple.bind, args)
-
- mx.exec.update.arg.arrays(rnn.exec, params$arg.params, match.name=TRUE)
- mx.exec.update.aux.arrays(rnn.exec, params$aux.params, match.name=TRUE)
-
- grad.arrays <- list()
- for (name in names(rnn.exec$ref.grad.arrays)) {
- if (is.param.name(name))
- grad.arrays[[name]] <- rnn.exec$ref.arg.arrays[[name]]*0
- }
- mx.exec.update.grad.arrays(rnn.exec, grad.arrays, match.name=TRUE)
-
- return (list(rnn.exec=rnn.exec, symbol=rnn.sym,
- num.lstm.layer=num.lstm.layer, num.hidden=num.hidden,
- seq.len=seq.len, batch.size=batch.size,
- num.embed=num.embed))
-
-}
-
-
-calc.nll <- function(seq.label.probs, batch.size) {
- nll = - sum(log(seq.label.probs)) / batch.size
- return (nll)
-}
-
-get.label <- function(label, ctx) {
- label <- as.array(label)
- seq.len <- dim(label)[[1]]
- batch.size <- dim(label)[[2]]
- sm.label <- array(0, dim=c(seq.len*batch.size))
- for (seqidx in 1:seq.len) {
- sm.label[((seqidx-1)*batch.size+1) : (seqidx*batch.size)] <- label[seqidx,]
- }
- return (mx.nd.array(sm.label, ctx))
-}
-
-
-
-train.lstm <- function(model, train.data, eval.data,
- num.round, update.period,
- optimizer='sgd', ctx=mx.ctx.default(), ...) {
- m <- model
- seq.len <- m$seq.len
- batch.size <- m$batch.size
- num.lstm.layer <- m$num.lstm.layer
- num.hidden <- m$num.hidden
-
- opt <- mx.opt.create(optimizer, rescale.grad=(1/batch.size), ...)
-
- updater <- mx.opt.get.updater(opt, m$rnn.exec$ref.arg.arrays)
- epoch.counter <- 0
- log.period <- max(as.integer(1000 / seq.len), 1)
- last.perp <- 10000000.0
-
- for (iteration in 1:num.round) {
- nbatch <- 0
- train.nll <- 0
- # reset states
- init.states <- list()
- for (i in 1:num.lstm.layer) {
- init.states[[paste0("l", i, ".init.c")]] <- mx.nd.zeros(c(num.hidden, batch.size))
- init.states[[paste0("l", i, ".init.h")]] <- mx.nd.zeros(c(num.hidden, batch.size))
- }
- mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
-
- tic <- Sys.time()
-
- train.data$reset()
-
- while (train.data$iter.next()) {
- # set rnn input
- rnn.input <- train.data$value()
- mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE)
-
- mx.exec.forward(m$rnn.exec, is.train=TRUE)
- seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx))
-
- mx.exec.backward(m$rnn.exec)
- init.states <- list()
- for (i in 1:num.lstm.layer) {
- init.states[[paste0("l", i, ".init.c")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
- init.states[[paste0("l", i, ".init.h")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
- }
- mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
- # update epoch counter
- epoch.counter <- epoch.counter + 1
- if (epoch.counter %% update.period == 0) {
- # the gradient of initial c and inital h should be zero
- init.grad <- list()
- for (i in 1:num.lstm.layer) {
- init.grad[[paste0("l", i, ".init.c")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
- init.grad[[paste0("l", i, ".init.h")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
- }
- mx.exec.update.grad.arrays(m$rnn.exec, init.grad, match.name=TRUE)
-
- arg.blocks <- updater(m$rnn.exec$ref.arg.arrays, m$rnn.exec$ref.grad.arrays)
-
- mx.exec.update.arg.arrays(m$rnn.exec, arg.blocks, skip.null=TRUE)
-
- grad.arrays <- list()
- for (name in names(m$rnn.exec$ref.grad.arrays)) {
- if (is.param.name(name))
- grad.arrays[[name]] <- m$rnn.exec$ref.grad.arrays[[name]]*0
- }
- mx.exec.update.grad.arrays(m$rnn.exec, grad.arrays, match.name=TRUE)
-
- }
-
- train.nll <- train.nll + calc.nll(as.array(seq.label.probs), batch.size)
-
- nbatch <- nbatch + seq.len
- if ((epoch.counter %% log.period) == 0) {
- cat(paste0("Epoch [", epoch.counter,
- "] Train: NLL=", train.nll / nbatch,
- ", Perp=", exp(train.nll / nbatch), "\n"))
- }
- }
- train.data$reset()
- # end of training loop
- toc <- Sys.time()
- cat(paste0("Iter [", iteration,
- "] Train: Time: ", as.numeric(toc - tic, units="secs"),
- " sec, NLL=", train.nll / nbatch,
- ", Perp=", exp(train.nll / nbatch), "\n"))
-
- if (!is.null(eval.data)) {
- val.nll <- 0.0
- # validation set, reset states
- init.states <- list()
- for (i in 1:num.lstm.layer) {
- init.states[[paste0("l", i, ".init.c")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
- init.states[[paste0("l", i, ".init.h")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
- }
- mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
-
- eval.data$reset()
- nbatch <- 0
- while (eval.data$iter.next()) {
- # set rnn input
- rnn.input <- eval.data$value()
- mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE)
- mx.exec.forward(m$rnn.exec, is.train=FALSE)
- # probability of each label class, used to evaluate nll
- seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx))
- # transfer the states
- init.states <- list()
- for (i in 1:num.lstm.layer) {
- init.states[[paste0("l", i, ".init.c")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
- init.states[[paste0("l", i, ".init.h")]] <- m$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
- }
- mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
- val.nll <- val.nll + calc.nll(as.array(seq.label.probs), batch.size)
- nbatch <- nbatch + seq.len
- }
- eval.data$reset()
- perp <- exp(val.nll / nbatch)
- cat(paste0("Iter [", iteration,
- "] Val: NLL=", val.nll / nbatch,
- ", Perp=", exp(val.nll / nbatch), "\n"))
- }
- }
-
- return (m)
-}
-
-check.data <- function(data, batch.size, is.train) {
- if (!is.null(data) && !is.list(data) && !is.mx.dataiter(data)) {
- stop("The dataset should be either a mx.io.DataIter or a R list")
- }
- if (is.list(data)) {
- if (is.null(data$data) || is.null(data$label)){
- stop("Please provide dataset as list(data=R.array, label=R.array)")
- }
- data <- mx.model.init.iter.rnn(data$data, data$label, batch.size=batch.size, is.train = is.train)
- }
- if (!is.null(data) && !data$iter.next()) {
- data$reset()
- if (!data$iter.next()) stop("Empty input")
- }
- return (data)
-}
#' Training LSTM Unrolled Model
#'
@@ -450,24 +206,36 @@ mx.lstm <- function(train.data, eval.data=NULL,
num.embed=num.embed,
num.label=num.label,
dropout=dropout)
+ init.states.c <- lapply(1:num.lstm.layer, function(i) {
+ state.c <- paste0("l", i, ".init.c")
+ return (state.c)
+ })
+ init.states.h <- lapply(1:num.lstm.layer, function(i) {
+ state.h <- paste0("l", i, ".init.h")
+ return (state.h)
+ })
+ init.states.name <- c(init.states.c, init.states.h)
+
# set up lstm model
model <- setup.rnn.model(rnn.sym=rnn.sym,
ctx=ctx,
- num.lstm.layer=num.lstm.layer,
+ num.rnn.layer=num.lstm.layer,
seq.len=seq.len,
num.hidden=num.hidden,
num.embed=num.embed,
num.label=num.label,
batch.size=batch.size,
input.size=input.size,
+ init.states.name=init.states.name,
initializer=initializer,
dropout=dropout)
# train lstm model
- model <- train.lstm(model, train.data, eval.data,
+ model <- train.rnn( model, train.data, eval.data,
num.round=num.round,
update.period=update.period,
ctx=ctx,
+ init.states.name=init.states.name,
...)
# change model into MXFeedForwardModel
model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays)
@@ -487,7 +255,7 @@ mx.lstm <- function(train.data, eval.data=NULL,
#' The output dim of embedding.
#' @param num.label integer
#' The number of labels.
-#' @param batch.size integer
+#' @param batch.size integer, default=1
#' The batch size used for R array training.
#' @param arg.params list
#' The batch size used for R array training.
@@ -495,7 +263,8 @@ mx.lstm <- function(train.data, eval.data=NULL,
#' Model parameter, list of name to NDArray of net's weights.
#' @param dropout float, default=0
#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.
-#' @return model a lstm inference model.
+#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+#' A lstm inference model.
#'
#' @export
mx.lstm.inference <- function(num.lstm.layer,
@@ -507,24 +276,35 @@ mx.lstm.inference <- function(num.lstm.layer,
arg.params,
ctx=mx.cpu(),
dropout=0.) {
- sym <- lstm.inference.symbol(num.lstm.layer,
- input.size,
- num.hidden,
- num.embed,
- num.label,
- dropout)
+ sym <- lstm.inference.symbol(num.lstm.layer=num.lstm.layer,
+ input.size=input.size,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ dropout=dropout)
+
+ init.states.c <- lapply(1:num.lstm.layer, function(i) {
+ state.c <- paste0("l", i, ".init.c")
+ return (state.c)
+ })
+ init.states.h <- lapply(1:num.lstm.layer, function(i) {
+ state.h <- paste0("l", i, ".init.h")
+ return (state.h)
+ })
+ init.states.name <- c(init.states.c, init.states.h)
seq.len <- 1
# set up lstm model
model <- setup.rnn.model(rnn.sym=sym,
ctx=ctx,
- num.lstm.layer=num.lstm.layer,
+ num.rnn.layer=num.lstm.layer,
seq.len=seq.len,
num.hidden=num.hidden,
num.embed=num.embed,
num.label=num.label,
batch.size=batch.size,
input.size=input.size,
+ init.states.name=init.states.name,
initializer=mx.init.uniform(0.01),
dropout=dropout)
arg.names <- names(model$rnn.exec$ref.arg.arrays)
@@ -557,11 +337,10 @@ mx.lstm.inference <- function(num.lstm.layer,
#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model.
#'
#' @export
-
mx.lstm.forward <- function(model, input.data, new.seq=FALSE) {
if (new.seq == TRUE) {
init.states <- list()
- for (i in 1:num.lstm.layer) {
+ for (i in 1:model$num.rnn.layer) {
init.states[[paste0("l", i, ".init.c")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.c")]]*0
init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
}
@@ -572,7 +351,7 @@ mx.lstm.forward <- function(model, input.data, new.seq=FALSE) {
mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE)
mx.exec.forward(model$rnn.exec, is.train=FALSE)
init.states <- list()
- for (i in 1:num.lstm.layer) {
+ for (i in 1:model$num.rnn.layer) {
init.states[[paste0("l", i, ".init.c")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.c_output")]]
init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]]
}
diff --git a/R-package/R/mxnet_generated.R b/R-package/R/mxnet_generated.R
index 08393769a368..d8e32ad58ea9 100644
--- a/R-package/R/mxnet_generated.R
+++ b/R-package/R/mxnet_generated.R
@@ -22,6 +22,64 @@ NULL
#' @name mx.nd.argmax.channel
NULL
+#' Broadcast data in the given axis to the given size. The original size of the broadcasting axis must be 1.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.broadcast.axis
+NULL
+
+#' lhs divide rhs with broadcast
+#'
+#' @param lhs NDArray
+#' Left operand to the function
+#' @param rhs NDArray
+#' Right operand to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.broadcast.div
+NULL
+
+#' lhs minus rhs with broadcast
+#'
+#' @param lhs NDArray
+#' Left operand to the function
+#' @param rhs NDArray
+#' Right operand to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.broadcast.minus
+NULL
+
+#' lhs multiple rhs with broadcast
+#'
+#' @param lhs NDArray
+#' Left operand to the function
+#' @param rhs NDArray
+#' Right operand to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.broadcast.mul
+NULL
+
+#' lhs add rhs with broadcast
+#'
+#' @param lhs NDArray
+#' Left operand to the function
+#' @param rhs NDArray
+#' Right operand to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.broadcast.plus
+NULL
+
#' Take ceil value of the src
#'
#' @param src NDArray
@@ -68,6 +126,16 @@ NULL
#' @name mx.nd.cos
NULL
+#' Crop the input matrix and return a new one
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.crop
+NULL
+
#' Calculate dot product of two matrices or two vectors
#'
#' @param lhs NDArray
@@ -90,6 +158,16 @@ NULL
#' @name mx.nd.exp
NULL
+#' Expand the shape of array by inserting a new axis.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.expand.dims
+NULL
+
#' Fill one element of each line(row for python, column for R/Julia) in lhs according to index indicated by rhs and values indicated by mhs. This function assume rhs uses 0-based index.
#'
#' @param lhs NDArray
@@ -104,6 +182,16 @@ NULL
#' @name mx.nd.fill.element.0index
NULL
+#' Flip the input matrix along axis and return a new one
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.flip
+NULL
+
#' Take floor value of the src
#'
#' @param src NDArray
@@ -124,7 +212,7 @@ NULL
#' @name mx.nd.log
NULL
-#' Take max of the src.The result will be ndarray of shape (1,) on the same device.
+#' Take max of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
#'
#' @param src NDArray
#' Source input to the function
@@ -134,7 +222,17 @@ NULL
#' @name mx.nd.max
NULL
-#' Take min of the src.The result will be ndarray of shape (1,) on the same device.
+#' (Depreciated! Use max instead!) Take max of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.max.axis
+NULL
+
+#' Take min of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
#'
#' @param src NDArray
#' Source input to the function
@@ -144,6 +242,16 @@ NULL
#' @name mx.nd.min
NULL
+#' (Depreciated! Use min instead!) Take min of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.min.axis
+NULL
+
#' Take L2 norm of the src.The result will be ndarray of shape (1,) on the same device.
#'
#' @param src NDArray
@@ -194,6 +302,26 @@ NULL
#' @name mx.nd.sin
NULL
+#' Slice the input along certain axis and return a sliced array.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.slice.axis
+NULL
+
+#' Calculate Smooth L1 Loss(lhs, scalar)
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.smooth.l1
+NULL
+
#' Calculate cross_entropy(lhs, one_hot(rhs))
#'
#' @param lhs NDArray
@@ -226,7 +354,7 @@ NULL
#' @name mx.nd.square
NULL
-#' Take sum of the src.The result will be ndarray of shape (1,) on the same device.
+#' Take sum of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
#'
#' @param src NDArray
#' Source input to the function
@@ -236,6 +364,16 @@ NULL
#' @name mx.nd.sum
NULL
+#' (Depreciated! Use sum instead!) Take sum of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
+#'
+#' @param src NDArray
+#' Source input to the function
+#' @return out The result mx.ndarray
+#'
+#' @export
+#' @name mx.nd.sum.axis
+NULL
+
#' Transpose the input matrix and return a new one
#'
#' @param src NDArray
@@ -331,6 +469,8 @@ mx.io.CSVIter <- function(...) {
#' Dataset Param: Shape of each instance generated by the DataIter.
#' @param inter.method int, optional, default='1'
#' Augmentation Param: 0-NN 1-bilinear 2-cubic 3-area 4-lanczos4 9-auto 10-rand.
+#' @param pad int, optional, default='0'
+#' Augmentation Param: Padding size.
#' @param mirror boolean, optional, default=False
#' Augmentation Param: Whether to mirror the image.
#' @param rand.mirror boolean, optional, default=False
@@ -505,6 +645,19 @@ mx.symbol.Crop <- function(...) {
mx.varg.symbol.Crop(list(...))
}
+#' Custom operator implemented in frontend.
+#'
+#' @param op.type string
+#' Type of custom operator. Must be registered first.
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.Custom <- function(...) {
+ mx.varg.symbol.Custom(list(...))
+}
+
#' Apply deconvolution to input then add a bias.
#'
#' @param data Symbol
@@ -518,7 +671,11 @@ mx.symbol.Crop <- function(...) {
#' @param stride Shape(tuple), optional, default=(1,1)
#' deconvolution stride: (y, x)
#' @param pad Shape(tuple), optional, default=(0,0)
-#' pad for deconvolution: (y, x)
+#' pad for deconvolution: (y, x), a good number is : (kernel-1)/2, if target_shape set, pad will be ignored and will be computed automatically
+#' @param adj Shape(tuple), optional, default=(0,0)
+#' adjustment for output shape: (y, x), if target_shape set, adj will be ignored and will be computed automatically
+#' @param target.shape Shape(tuple), optional, default=(0,0)
+#' output shape with targe shape : (y, x)
#' @param num.filter int (non-negative), required
#' deconvolution filter(channel) number
#' @param num.group int (non-negative), optional, default=1
@@ -586,7 +743,7 @@ mx.symbol.Embedding <- function(...) {
#' Flatten input
#'
#' @param data Symbol
-#' Input data to flatten.
+#' Input data to flatten.
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -745,10 +902,27 @@ mx.symbol.MAERegressionOutput <- function(...) {
mx.varg.symbol.MAERegressionOutput(list(...))
}
+#' Get output from a symbol and pass 1 gradient back. This is used as a terminal loss if unary and binary operator are used to composite a loss with no declaration of backward dependency
+#'
+#' @param data Symbol
+#' Input data.
+#' @param grad.scale float, optional, default=1
+#' gradient scale as a supplement to unary and binary operators
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.MakeLoss <- function(...) {
+ mx.varg.symbol.MakeLoss(list(...))
+}
+
#' Perform spatial pooling on inputs.
#'
#' @param data Symbol
#' Input data to the pooling operator.
+#' @param global.pool boolean, optional, default=False
+#' Ignore kernel size, do global pooling based on current input feature map. This is useful for input with different shape
#' @param kernel Shape(tuple), required
#' pooling kernel size: (y, x)
#' @param pool.type {'avg', 'max', 'sum'}, required
@@ -766,14 +940,16 @@ mx.symbol.Pooling <- function(...) {
mx.varg.symbol.Pooling(list(...))
}
-#' Resize regions of interest in an input plane to a fixed size by MAX pooling.
+#' Performs region-of-interest pooling on inputs. Resize bounding box coordinates by spatial_scale and crop input feature maps accordingly. The cropped feature maps are pooled by max pooling to a fixed size output indicated by pooled_size. batch_size will change to the number of region bounding boxes after ROIPooling
#'
-#' @param data Symbol[]
-#' [input tensor, regions of interest]
+#' @param data Symbol
+#' Input data to the pooling operator, a 4D Feature maps
+#' @param rois Symbol
+#' Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]]. (x1, y1) and (x2, y2) are top left and down right corners of designated region of interest. batch_index indicates the index of corresponding image in the input data
#' @param pooled.size Shape(tuple), required
-#' target size: (h, w)
+#' fix pooled size: (h, w)
#' @param spatial.scale float, required
-#' Ratio of input plane height (or w) to raw image height (or w).
+#' Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal of total stride in convolutional layers
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -786,11 +962,13 @@ mx.symbol.ROIPooling <- function(...) {
#' Reshape input to target shape
#'
#' @param data Symbol
-#' Input data to reshape.
-#' @param target.shape Shape(tuple), required
-#' Target new shape. One and only one dim can be 0, in which case it will be inferred from the rest of dims
+#' Input data to reshape.
+#' @param target.shape Shape(tuple), optional, default=(0,0)
+#' (Deprecated! Use shape instead.) Target new shape. One and only one dim can be 0, in which case it will be inferred from the rest of dims
#' @param keep.highest boolean, optional, default=False
-#' Whether keep the highest dim unchanged.If set to yes, than the first dim in target_shape is ignored,and always fixed as input
+#' (Deprecated! Use shape instead.) Whether keep the highest dim unchanged.If set to yes, than the first dim in target_shape is ignored,and always fixed as input
+#' @param shape , optional, default=()
+#' Target new shape. If the dim is same, set it to 0. If the dim is set to be -1, it will be inferred from the rest of dims. One and only one dim can be -1
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -829,6 +1007,8 @@ mx.symbol.SliceChannel <- function(...) {
#' If set to true, for a (n,k,x_1,..,x_n) dimensional input tensor, softmax will generate n*x_1*...*x_n output, each has k classes
#' @param use.ignore boolean, optional, default=False
#' If set to true, the ignore_label value will not contribute to the backward gradient
+#' @param normalization {'batch', 'null', 'valid'},optional, default='null'
+#' If set to null, op will do nothing on output gradient.If set to batch, op will normalize gradient by divide batch sizeIf set to valid, op will normalize gradient by divide sample not ignored
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -858,7 +1038,7 @@ mx.symbol.SoftmaxActivation <- function(...) {
#' @param data Symbol
#' Input data to softmax.
#' @param label Symbol
-#' Label data.
+#' Label data, can also be probability value with same shape as data
#' @param grad.scale float, optional, default=1
#' Scale the gradient by a float factor
#' @param ignore.label float, optional, default=-1
@@ -867,6 +1047,8 @@ mx.symbol.SoftmaxActivation <- function(...) {
#' If set to true, for a (n,k,x_1,..,x_n) dimensional input tensor, softmax will generate n*x_1*...*x_n output, each has k classes
#' @param use.ignore boolean, optional, default=False
#' If set to true, the ignore_label value will not contribute to the backward gradient
+#' @param normalization {'batch', 'null', 'valid'},optional, default='null'
+#' If set to null, op will do nothing on output gradient.If set to batch, op will normalize gradient by divide batch sizeIf set to valid, op will normalize gradient by divide sample not ignored
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -907,6 +1089,8 @@ mx.symbol.SwapAxis <- function(...) {
#' How to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images together, only available for nearest neighbor upsampling.
#' @param num.args int, required
#' Number of inputs to be upsampled. For nearest neighbor upsampling, this can be 1-N; the size of output will be(scale*h_0,scale*w_0) and all other inputs will be upsampled to thesame size. For bilinear upsampling this must be 2; 1 input and 1 weight.
+#' @param workspace long (non-negative), optional, default=512
+#' Tmp workspace for deconvolution (MB)
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -918,24 +1102,93 @@ mx.symbol.UpSampling <- function(...) {
#' Take absolute value of the src
#'
+#' @param src Symbol
+#' Left symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.abs <- function(...) {
+ mx.varg.symbol.abs(list(...))
+}
+
+#' Broadcast data in the given axis to the given size. The original size of the broadcasting axis must be 1.
+#'
+#' @param src Symbol
+#' Left symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.broadcast_axis <- function(...) {
+ mx.varg.symbol.broadcast_axis(list(...))
+}
+
+#' lhs divide rhs with broadcast
+#'
#' @param lhs Symbol
#' Left symbolic input to the function
#' @param rhs Symbol
+#' Right symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.broadcast_div <- function(...) {
+ mx.varg.symbol.broadcast_div(list(...))
+}
+
+#' lhs minus rhs with broadcast
+#'
+#' @param lhs Symbol
#' Left symbolic input to the function
+#' @param rhs Symbol
+#' Right symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
#'
#' @export
-mx.symbol.abs <- function(...) {
- mx.varg.symbol.abs(list(...))
+mx.symbol.broadcast_minus <- function(...) {
+ mx.varg.symbol.broadcast_minus(list(...))
}
-#' Take ceil value of the src
+#' lhs multiple rhs with broadcast
+#'
+#' @param lhs Symbol
+#' Left symbolic input to the function
+#' @param rhs Symbol
+#' Right symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.broadcast_mul <- function(...) {
+ mx.varg.symbol.broadcast_mul(list(...))
+}
+
+#' lhs add rhs with broadcast
#'
#' @param lhs Symbol
#' Left symbolic input to the function
#' @param rhs Symbol
+#' Right symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.broadcast_plus <- function(...) {
+ mx.varg.symbol.broadcast_plus(list(...))
+}
+
+#' Take ceil value of the src
+#'
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -948,9 +1201,7 @@ mx.symbol.ceil <- function(...) {
#' Take cos of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -966,7 +1217,7 @@ mx.symbol.cos <- function(...) {
#' @param lhs Symbol
#' Left symbolic input to the function
#' @param rhs Symbol
-#' Left symbolic input to the function
+#' Right symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -978,9 +1229,7 @@ mx.symbol.dot <- function(...) {
#' Take exp of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -991,11 +1240,22 @@ mx.symbol.exp <- function(...) {
mx.varg.symbol.exp(list(...))
}
-#' Take floor value of the src
+#' Expand the shape of array by inserting a new axis.
#'
-#' @param lhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.expand_dims <- function(...) {
+ mx.varg.symbol.expand_dims(list(...))
+}
+
+#' Take floor value of the src
+#'
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1008,9 +1268,7 @@ mx.symbol.floor <- function(...) {
#' Take log of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1021,11 +1279,26 @@ mx.symbol.log <- function(...) {
mx.varg.symbol.log(list(...))
}
+#' Sample a normal distribution
+#'
+#' @param loc float, optional, default=0
+#' Mean of the distribution.
+#' @param scale float, optional, default=1
+#' Standard deviation of the distribution.
+#' @param shape Shape(tuple), required
+#' The shape of the output
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.normal <- function(...) {
+ mx.varg.symbol.normal(list(...))
+}
+
#' Take round value of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1038,9 +1311,7 @@ mx.symbol.round <- function(...) {
#' Take rsqrt of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1053,9 +1324,7 @@ mx.symbol.rsqrt <- function(...) {
#' Take sign value of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1068,9 +1337,7 @@ mx.symbol.sign <- function(...) {
#' Take sin of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1081,12 +1348,38 @@ mx.symbol.sin <- function(...) {
mx.varg.symbol.sin(list(...))
}
+#' Slice the input along certain axis and return a sliced array.
+#'
+#' @param src Symbol
+#' Left symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.slice_axis <- function(...) {
+ mx.varg.symbol.slice_axis(list(...))
+}
+
+#' Calculate Smooth L1 Loss(lhs, scalar)
+#'
+#' @param src Symbol
+#' Left symbolic input to the function
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.smooth_l1 <- function(...) {
+ mx.varg.symbol.smooth_l1(list(...))
+}
+
#' Calculate cross_entropy(lhs, one_hot(rhs))
#'
#' @param lhs Symbol
#' Left symbolic input to the function
#' @param rhs Symbol
-#' Left symbolic input to the function
+#' Right symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
#' @return out The result mx.symbol
@@ -1098,9 +1391,7 @@ mx.symbol.softmax_cross_entropy <- function(...) {
#' Take sqrt of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1113,9 +1404,7 @@ mx.symbol.sqrt <- function(...) {
#' Take square of the src
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1126,11 +1415,9 @@ mx.symbol.square <- function(...) {
mx.varg.symbol.square(list(...))
}
-#' Take sum of the src.The result will be ndarray of shape (1,) on the same device.
+#' Take sum of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
#'
-#' @param lhs Symbol
-#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1141,11 +1428,22 @@ mx.symbol.sum <- function(...) {
mx.varg.symbol.sum(list(...))
}
-#' Transpose the input matrix and return a new one
+#' (Depreciated! Use sum instead!) Take sum of the src in the given axis. Params: `axis` and `keepdims`. axis: tuple or integer of axes to reduce, global reduce will be performed if not set. keepdims: the same meaning as Numpy.
#'
-#' @param lhs Symbol
+#' @param src Symbol
#' Left symbolic input to the function
-#' @param rhs Symbol
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.sum_axis <- function(...) {
+ mx.varg.symbol.sum_axis(list(...))
+}
+
+#' Transpose the input matrix and return a new one
+#'
+#' @param src Symbol
#' Left symbolic input to the function
#' @param name string, optional
#' Name of the resulting symbol.
@@ -1155,3 +1453,20 @@ mx.symbol.sum <- function(...) {
mx.symbol.transpose <- function(...) {
mx.varg.symbol.transpose(list(...))
}
+
+#' Sample a uniform distribution
+#'
+#' @param low float, optional, default=0
+#' The lower bound of distribution
+#' @param high float, optional, default=1
+#' The upper bound of distribution
+#' @param shape Shape(tuple), required
+#' The shape of the output
+#' @param name string, optional
+#' Name of the resulting symbol.
+#' @return out The result mx.symbol
+#'
+#' @export
+mx.symbol.uniform <- function(...) {
+ mx.varg.symbol.uniform(list(...))
+}
diff --git a/R-package/R/optimizer.R b/R-package/R/optimizer.R
index 18c4d81aa9ac..36543931d1f6 100644
--- a/R-package/R/optimizer.R
+++ b/R-package/R/optimizer.R
@@ -66,6 +66,89 @@ mx.opt.sgd <- function(learning.rate,
return(list(create.state=create.state, update=update))
}
+#' Create an RMSProp optimizer with respective parameters.
+#' Reference: Tieleman T, Hinton G. Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude[J]. COURSERA: Neural Networks for Machine Learning, 2012, 4(2).
+#' The code follows: http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
+#'
+#' @param learning.rate float, default=0.002
+#' Step size.
+#' @param gamma1 float, default=0.95
+#' decay factor of moving average for gradient, gradient^2.
+#' @param gamm2 float, default=0.9
+#' "momentum" factor.
+#' @param wd float, default=0.0
+#' L2 regularization coefficient add to all the weights.
+#' @param rescale.grad float, default=1.0
+#' rescaling factor of gradient.
+#' @param clip_gradient float, optional
+#' clip gradient in range [-clip_gradient, clip_gradient].
+#' @param lr_scheduler function, optional
+#' The learning rate scheduler.
+#'
+mx.opt.rmsprop <- function(learning.rate=0.002,
+ gamma1=0.95,
+ gamma2=0.9,
+ wd=0,
+ rescale.grad=1,
+ clip_gradient = NULL,
+ lr_scheduler = NULL) {
+ # use lr as short for learing rate.
+ lr <- learning.rate
+ count <- 0
+ num_update <- 0
+
+ rmsprop <- new.env()
+ rmsprop$lr <- lr
+ rmsprop$count <- 0
+ rmsprop$num_update <- 0
+
+ create.state <- function(index, weight) {
+ return (list(n=mx.nd.zeros(dim(weight), ctx(weight)),
+ g=mx.nd.zeros(dim(weight), ctx(weight)),
+ delta=mx.nd.zeros(dim(weight), ctx(weight))))
+ }
+
+ update <- function(index, weight, grad, state) {
+ if (!is.null(lr_scheduler)){
+ lr_scheduler(rmsprop) ## changing lr
+ lr <- rmsprop$lr
+ ## update count
+ indexKey <- paste0('ik', index)
+ if (!exists(envir = rmsprop, x = indexKey)){
+ assign(x = indexKey, value = 0, envir = rmsprop)
+ } else {
+ indexValue <- get(envir = rmsprop, x = indexKey)
+ assign(x = indexKey, value = indexValue + 1, envir = rmsprop)
+ rmsprop$num_update <- max(rmsprop$num_update, get(envir = rmsprop, x = indexKey))
+ }
+ }
+ grad <- grad * rescale.grad
+ if (!is.null(clip_gradient)){
+ if(clip_gradient >= 0){
+ grad_ctx <- ctx(grad)
+ grad <- as.array(grad)
+ grad <- pmax(grad, -1 * clip_gradient)
+ grad <- pmin(grad, clip_gradient)
+ grad <- mx.nd.array(grad, grad_ctx)
+ } else {
+ stop("Error: clip_gradient should be positive number.")
+ }
+ }
+
+ n <- state$n
+ g <- state$g
+ delta <- state$delta
+ n <- gamma1 * n + (1 - gamma1) * (grad * grad)
+ g <- gamma1 * g + (1 - gamma1) * grad
+ delta <- gamma2 * delta - lr * (grad / mx.nd.sqrt(n - g*g + 1e-4) + wd * weight)
+ weight <- weight + delta
+ state <- list(n=n, g=g, delta=delta)
+
+ return(list(weight=weight, state=state))
+ }
+ return(list(create.state=create.state, update=update))
+}
+
#' Create an optimizer by name and parameters
#'
#' @param name The name of the optimizer
@@ -76,6 +159,9 @@ mx.opt.create <- function(name, ...) {
if (name == "sgd") {
return(mx.opt.sgd(...))
}
+ else if (name == "rmsprop") {
+ return (mx.opt.rmsprop(...))
+ }
stop(paste("Unknown optimizer ", name))
}
diff --git a/R-package/R/rnn.R b/R-package/R/rnn.R
new file mode 100644
index 000000000000..b89559a58570
--- /dev/null
+++ b/R-package/R/rnn.R
@@ -0,0 +1,342 @@
+# rnn cell symbol
+rnn <- function(num.hidden, indata, prev.state, param, seqidx,
+ layeridx, dropout=0., batch.norm=FALSE) {
+ if (dropout > 0. )
+ indata <- mx.symbol.Dropout(data=indata, p=dropout)
+ i2h <- mx.symbol.FullyConnected(data=indata,
+ weight=param$i2h.weight,
+ bias=param$i2h.bias,
+ num.hidden=num.hidden,
+ name=paste0("t", seqidx, ".l", layeridx, ".i2h"))
+ h2h <- mx.symbol.FullyConnected(data=prev.state$h,
+ weight=param$h2h.weight,
+ bias=param$h2h.bias,
+ num.hidden=num.hidden,
+ name=paste0("t", seqidx, ".l", layeridx, ".h2h"))
+ hidden <- i2h + h2h
+
+ hidden <- mx.symbol.Activation(data=hidden, act.type="tanh")
+ if (batch.norm)
+ hidden <- mx.symbol.BatchNorm(data=hidden)
+ return (list(h=hidden))
+}
+
+# unrolled rnn network
+rnn.unroll <- function(num.rnn.layer, seq.len, input.size, num.hidden,
+ num.embed, num.label, dropout=0., batch.norm=FALSE) {
+ embed.weight <- mx.symbol.Variable("embed.weight")
+ cls.weight <- mx.symbol.Variable("cls.weight")
+ cls.bias <- mx.symbol.Variable("cls.bias")
+ param.cells <- lapply(1:num.rnn.layer, function(i) {
+ cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")),
+ i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")),
+ h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")),
+ h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias")))
+ return (cell)
+ })
+ last.states <- lapply(1:num.rnn.layer, function(i) {
+ state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
+ return (state)
+ })
+
+ # embeding layer
+ label <- mx.symbol.Variable("label")
+ data <- mx.symbol.Variable("data")
+ embed <- mx.symbol.Embedding(data=data, input_dim=input.size,
+ weight=embed.weight, output_dim=num.embed, name="embed")
+ wordvec <- mx.symbol.SliceChannel(data=embed, num_outputs=seq.len, squeeze_axis=1)
+
+ last.hidden <- list()
+ for (seqidx in 1:seq.len) {
+ hidden <- wordvec[[seqidx]]
+ # stack RNN
+ for (i in 1:num.rnn.layer) {
+ dp <- ifelse(i==1, 0, dropout)
+ next.state <- rnn(num.hidden, indata=hidden,
+ prev.state=last.states[[i]],
+ param=param.cells[[i]],
+ seqidx=seqidx, layeridx=i,
+ dropout=dp, batch.norm=batch.norm)
+ hidden <- next.state$h
+ last.states[[i]] <- next.state
+ }
+ # decoder
+ if (dropout > 0.)
+ hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
+ last.hidden <- c(last.hidden, hidden)
+ }
+ last.hidden$dim <- 0
+ last.hidden$num.args <- seq.len
+ concat <-mxnet:::mx.varg.symbol.Concat(last.hidden)
+ fc <- mx.symbol.FullyConnected(data=concat,
+ weight=cls.weight,
+ bias=cls.bias,
+ num.hidden=num.label)
+ label <- mx.symbol.transpose(data=label)
+ label <- mx.symbol.Reshape(data=label, target.shape=c(0))
+
+ loss.all <- mx.symbol.SoftmaxOutput(data=fc, label=label, name="sm")
+ return (loss.all)
+}
+
+# rnn inference model symbol
+rnn.inference.symbol <- function(num.rnn.layer, seq.len, input.size, num.hidden,
+ num.embed, num.label, dropout=0., batch.norm=FALSE) {
+ seqidx <- 0
+ embed.weight <- mx.symbol.Variable("embed.weight")
+ cls.weight <- mx.symbol.Variable("cls.weight")
+ cls.bias <- mx.symbol.Variable("cls.bias")
+ param.cells <- lapply(1:num.rnn.layer, function(i) {
+ cell <- list(i2h.weight = mx.symbol.Variable(paste0("l", i, ".i2h.weight")),
+ i2h.bias = mx.symbol.Variable(paste0("l", i, ".i2h.bias")),
+ h2h.weight = mx.symbol.Variable(paste0("l", i, ".h2h.weight")),
+ h2h.bias = mx.symbol.Variable(paste0("l", i, ".h2h.bias")))
+ return (cell)
+ })
+ last.states <- lapply(1:num.rnn.layer, function(i) {
+ state <- list(h=mx.symbol.Variable(paste0("l", i, ".init.h")))
+ return (state)
+ })
+
+ # embeding layer
+ data <- mx.symbol.Variable("data")
+ hidden <- mx.symbol.Embedding(data=data, input_dim=input.size,
+ weight=embed.weight, output_dim=num.embed, name="embed")
+ # stack RNN
+ for (i in 1:num.rnn.layer) {
+ dp <- ifelse(i==1, 0, dropout)
+ next.state <- rnn(num.hidden, indata=hidden,
+ prev.state=last.states[[i]],
+ param=param.cells[[i]],
+ seqidx=seqidx, layeridx=i,
+ dropout=dp, batch.norm=batch.norm)
+ hidden <- next.state$h
+ last.states[[i]] <- next.state
+ }
+ # decoder
+ if (dropout > 0.)
+ hidden <- mx.symbol.Dropout(data=hidden, p=dropout)
+
+ fc <- mx.symbol.FullyConnected(data=hidden,
+ weight=cls.weight,
+ bias=cls.bias,
+ num_hidden=num.label)
+ sm <- mx.symbol.SoftmaxOutput(data=fc, name='sm')
+ unpack.h <- lapply(1:num.rnn.layer, function(i) {
+ state <- last.states[[i]]
+ state.h <- mx.symbol.BlockGrad(state$h, name=paste0("l", i, ".last.h"))
+ return (state.h)
+ })
+ list.all <- c(sm, unpack.h)
+ return (mx.symbol.Group(list.all))
+}
+
+#' Training RNN Unrolled Model
+#'
+#' @param train.data mx.io.DataIter or list(data=R.array, label=R.array)
+#' The Training set.
+#' @param eval.data mx.io.DataIter or list(data=R.array, label=R.array), optional
+#' The validation set used for validation evaluation during the progress.
+#' @param num.rnn.layer integer
+#' The number of the layer of rnn.
+#' @param seq.len integer
+#' The length of the input sequence.
+#' @param num.hidden integer
+#' The number of hidden nodes.
+#' @param num.embed integer
+#' The output dim of embedding.
+#' @param num.label integer
+#' The number of labels.
+#' @param batch.size integer
+#' The batch size used for R array training.
+#' @param input.size integer
+#' The input dim of one-hot encoding of embedding
+#' @param ctx mx.context, optional
+#' The device used to perform training.
+#' @param num.round integer, default=10
+#' The number of iterations over training data to train the model.
+#' @param update.period integer, default=1
+#' The number of iterations to update parameters during training period.
+#' @param initializer initializer object. default=mx.init.uniform(0.01)
+#' The initialization scheme for parameters.
+#' @param dropout float, default=0
+#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.
+#' @param optimizer string, default="sgd"
+#' The optimization method.
+#' @param batch.norm boolean, default=FALSE
+#' Whether to use batch normalization.
+#' @param ... other parameters passing to \code{mx.rnn}/.
+#' @return model A trained rnn unrolled model.
+#'
+#' @export
+mx.rnn <- function( train.data, eval.data=NULL,
+ num.rnn.layer, seq.len,
+ num.hidden, num.embed, num.label,
+ batch.size, input.size,
+ ctx=mx.ctx.default(),
+ num.round=10, update.period=1,
+ initializer=mx.init.uniform(0.01),
+ dropout=0, optimizer='sgd',
+ batch.norm=FALSE,
+ ...) {
+ # check data and change data into iterator
+ train.data <- check.data(train.data, batch.size, TRUE)
+ eval.data <- check.data(eval.data, batch.size, FALSE)
+
+ # get unrolled rnn symbol
+ rnn.sym <- rnn.unroll( num.rnn.layer=num.rnn.layer,
+ num.hidden=num.hidden,
+ seq.len=seq.len,
+ input.size=input.size,
+ num.embed=num.embed,
+ num.label=num.label,
+ dropout=dropout,
+ batch.norm=batch.norm)
+ init.states.name <- lapply(1:num.rnn.layer, function(i) {
+ state <- paste0("l", i, ".init.h")
+ return (state)
+ })
+ # set up rnn model
+ model <- setup.rnn.model(rnn.sym=rnn.sym,
+ ctx=ctx,
+ num.rnn.layer=num.rnn.layer,
+ seq.len=seq.len,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ batch.size=batch.size,
+ input.size=input.size,
+ init.states.name=init.states.name,
+ initializer=initializer,
+ dropout=dropout)
+ # train rnn model
+ model <- train.rnn( model, train.data, eval.data,
+ num.round=num.round,
+ update.period=update.period,
+ ctx=ctx,
+ init.states.name=init.states.name,
+ ...)
+ # change model into MXFeedForwardModel
+ model <- list(symbol=model$symbol, arg.params=model$rnn.exec$ref.arg.arrays, aux.params=model$rnn.exec$ref.aux.arrays)
+ return(structure(model, class="MXFeedForwardModel"))
+}
+
+#' Create a RNN Inference Model
+#'
+#' @param num.rnn.layer integer
+#' The number of the layer of rnn.
+#' @param input.size integer
+#' The input dim of one-hot encoding of embedding
+#' @param num.hidden integer
+#' The number of hidden nodes.
+#' @param num.embed integer
+#' The output dim of embedding.
+#' @param num.label integer
+#' The number of labels.
+#' @param batch.size integer, default=1
+#' The batch size used for R array training.
+#' @param arg.params list
+#' The batch size used for R array training.
+#' @param ctx mx.context, optional
+#' Model parameter, list of name to NDArray of net's weights.
+#' @param dropout float, default=0
+#' A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.
+#' @param batch.norm boolean, default=FALSE
+#' Whether to use batch normalization.
+#' @return model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+#' A rnn inference model.
+#'
+#' @export
+mx.rnn.inference <- function( num.rnn.layer,
+ input.size,
+ num.hidden,
+ num.embed,
+ num.label,
+ batch.size=1,
+ arg.params,
+ ctx=mx.cpu(),
+ dropout=0.,
+ batch.norm=FALSE) {
+ sym <- rnn.inference.symbol( num.rnn.layer=num.rnn.layer,
+ input.size=input.size,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ dropout=dropout,
+ batch.norm=batch.norm)
+ # init.states.name <- c()
+ # for (i in 1:num.rnn.layer) {
+ # init.states.name <- c(init.states.name, paste0("l", i, ".init.c"))
+ # init.states.name <- c(init.states.name, paste0("l", i, ".init.h"))
+ # }
+ init.states.name <- lapply(1:num.rnn.layer, function(i) {
+ state <- paste0("l", i, ".init.h")
+ return (state)
+ })
+
+ seq.len <- 1
+ # set up rnn model
+ model <- setup.rnn.model(rnn.sym=sym,
+ ctx=ctx,
+ num.rnn.layer=num.rnn.layer,
+ seq.len=seq.len,
+ num.hidden=num.hidden,
+ num.embed=num.embed,
+ num.label=num.label,
+ batch.size=batch.size,
+ input.size=input.size,
+ init.states.name=init.states.name,
+ initializer=mx.init.uniform(0.01),
+ dropout=dropout)
+ arg.names <- names(model$rnn.exec$ref.arg.arrays)
+ for (k in names(arg.params)) {
+ if ((k %in% arg.names) && is.param.name(k) ) {
+ rnn.input <- list()
+ rnn.input[[k]] <- arg.params[[k]]
+ mx.exec.update.arg.arrays(model$rnn.exec, rnn.input, match.name=TRUE)
+ }
+ }
+ init.states <- list()
+ for (i in 1:num.rnn.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+
+ return (model)
+}
+
+#' Using forward function to predict in rnn inference model
+#'
+#' @param model rnn model
+#' A rnn inference model
+#' @param input.data, array.matrix
+#' The input data for forward function
+#' @param new.seq boolean, default=FALSE
+#' Whether the input is the start of a new sequence
+#'
+#' @return result A list(prob=prob, model=model) containing the result probability of each label and the model.
+#'
+#' @export
+mx.rnn.forward <- function(model, input.data, new.seq=FALSE) {
+ if (new.seq == TRUE) {
+ init.states <- list()
+ for (i in 1:model$num.rnn.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.arg.arrays[[paste0("l", i, ".init.h")]]*0
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+ }
+ dim(input.data) <- c(model$batch.size)
+ data <- list(data=mx.nd.array(input.data))
+ mx.exec.update.arg.arrays(model$rnn.exec, data, match.name=TRUE)
+ mx.exec.forward(model$rnn.exec, is.train=FALSE)
+ init.states <- list()
+ for (i in 1:model$num.rnn.layer) {
+ init.states[[paste0("l", i, ".init.h")]] <- model$rnn.exec$ref.outputs[[paste0("l", i, ".last.h_output")]]
+ }
+ mx.exec.update.arg.arrays(model$rnn.exec, init.states, match.name=TRUE)
+ #print (model$rnn.exec$ref)
+ prob <- model$rnn.exec$ref.outputs[["sm_output"]]
+ print ("prob")
+ print (prob)
+ return (list(prob=prob, model=model))
+}
diff --git a/R-package/R/rnn_model.R b/R-package/R/rnn_model.R
new file mode 100644
index 000000000000..19f53b0f6407
--- /dev/null
+++ b/R-package/R/rnn_model.R
@@ -0,0 +1,244 @@
+is.param.name <- function(name) {
+ return (grepl('weight$', name) || grepl('bias$', name) ||
+ grepl('gamma$', name) || grepl('beta$', name) )
+}
+
+# Initialize parameters
+mx.model.init.params.rnn <- function(symbol, input.shape, initializer, ctx) {
+ if (!is.mx.symbol(symbol)) stop("symbol need to be MXSymbol")
+ slist <- symbol$infer.shape(input.shape)
+ if (is.null(slist)) stop("Not enough information to get shapes")
+ arg.params <- mx.init.create(initializer, slist$arg.shapes, ctx, skip.unknown=TRUE)
+ aux.params <- mx.init.create(initializer, slist$aux.shapes, ctx, skip.unknown=FALSE)
+ return(list(arg.params=arg.params, aux.params=aux.params))
+}
+
+# Initialize the data iter
+mx.model.init.iter.rnn <- function(X, y, batch.size, is.train) {
+ if (is.MXDataIter(X)) return(X)
+ shape <- dim(data)
+ if (is.null(shape)) {
+ num.data <- length(X)
+ } else {
+ ndim <- length(shape)
+ num.data <- shape[[ndim]]
+ }
+ if (is.null(y)) {
+ if (is.train) stop("Need to provide parameter y for training with R arrays.")
+ y <- c(1:num.data) * 0
+ }
+
+ batch.size <- min(num.data, batch.size)
+
+ return(mx.io.arrayiter(X, y, batch.size=batch.size, shuffle=is.train))
+}
+
+# set up rnn model with rnn cells
+setup.rnn.model <- function(rnn.sym, ctx,
+ num.rnn.layer, seq.len,
+ num.hidden, num.embed, num.label,
+ batch.size, input.size,
+ init.states.name,
+ initializer=mx.init.uniform(0.01),
+ dropout=0) {
+
+ arg.names <- rnn.sym$arguments
+ input.shapes <- list()
+ for (name in arg.names) {
+ if (name %in% init.states.name) {
+ input.shapes[[name]] <- c(num.hidden, batch.size)
+ }
+ else if (grepl('data$', name) || grepl('label$', name) ) {
+ if (seq.len == 1) {
+ input.shapes[[name]] <- c(batch.size)
+ } else {
+ input.shapes[[name]] <- c(seq.len, batch.size)
+ }
+ }
+ }
+ params <- mx.model.init.params.rnn(rnn.sym, input.shapes, initializer, mx.cpu())
+ args <- input.shapes
+ args$symbol <- rnn.sym
+ args$ctx <- ctx
+ args$grad.req <- "add"
+ rnn.exec <- do.call(mx.simple.bind, args)
+
+ mx.exec.update.arg.arrays(rnn.exec, params$arg.params, match.name=TRUE)
+ mx.exec.update.aux.arrays(rnn.exec, params$aux.params, match.name=TRUE)
+
+ grad.arrays <- list()
+ for (name in names(rnn.exec$ref.grad.arrays)) {
+ if (is.param.name(name))
+ grad.arrays[[name]] <- rnn.exec$ref.arg.arrays[[name]]*0
+ }
+ mx.exec.update.grad.arrays(rnn.exec, grad.arrays, match.name=TRUE)
+
+ return (list(rnn.exec=rnn.exec, symbol=rnn.sym,
+ num.rnn.layer=num.rnn.layer, num.hidden=num.hidden,
+ seq.len=seq.len, batch.size=batch.size,
+ num.embed=num.embed))
+
+}
+
+
+calc.nll <- function(seq.label.probs, batch.size) {
+ nll = - sum(log(seq.label.probs)) / batch.size
+ return (nll)
+}
+
+get.label <- function(label, ctx) {
+ label <- as.array(label)
+ seq.len <- dim(label)[[1]]
+ batch.size <- dim(label)[[2]]
+ sm.label <- array(0, dim=c(seq.len*batch.size))
+ for (seqidx in 1:seq.len) {
+ sm.label[((seqidx-1)*batch.size+1) : (seqidx*batch.size)] <- label[seqidx,]
+ }
+ return (mx.nd.array(sm.label, ctx))
+}
+
+
+# training rnn model
+train.rnn <- function (model, train.data, eval.data,
+ num.round, update.period,
+ init.states.name,
+ optimizer='sgd', ctx=mx.ctx.default(), ...) {
+ m <- model
+ seq.len <- m$seq.len
+ batch.size <- m$batch.size
+ num.rnn.layer <- m$num.rnn.layer
+ num.hidden <- m$num.hidden
+
+ opt <- mx.opt.create(optimizer, rescale.grad=(1/batch.size), ...)
+
+ updater <- mx.opt.get.updater(opt, m$rnn.exec$ref.arg.arrays)
+ epoch.counter <- 0
+ log.period <- max(as.integer(1000 / seq.len), 1)
+ last.perp <- 10000000.0
+
+ for (iteration in 1:num.round) {
+ nbatch <- 0
+ train.nll <- 0
+ # reset states
+ init.states <- list()
+ for (name in init.states.name) {
+ init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0
+ }
+
+ mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
+
+ tic <- Sys.time()
+
+ train.data$reset()
+
+ while (train.data$iter.next()) {
+ # set rnn input
+ rnn.input <- train.data$value()
+ mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE)
+
+ mx.exec.forward(m$rnn.exec, is.train=TRUE)
+ seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx))
+
+ mx.exec.backward(m$rnn.exec)
+ init.states <- list()
+ for (name in init.states.name) {
+ init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0
+ }
+
+ mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
+ # update epoch counter
+ epoch.counter <- epoch.counter + 1
+ if (epoch.counter %% update.period == 0) {
+ # the gradient of initial c and inital h should be zero
+ init.grad <- list()
+ for (name in init.states.name) {
+ init.grad[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0
+ }
+
+ mx.exec.update.grad.arrays(m$rnn.exec, init.grad, match.name=TRUE)
+
+ arg.blocks <- updater(m$rnn.exec$ref.arg.arrays, m$rnn.exec$ref.grad.arrays)
+
+ mx.exec.update.arg.arrays(m$rnn.exec, arg.blocks, skip.null=TRUE)
+
+ grad.arrays <- list()
+ for (name in names(m$rnn.exec$ref.grad.arrays)) {
+ if (is.param.name(name))
+ grad.arrays[[name]] <- m$rnn.exec$ref.grad.arrays[[name]]*0
+ }
+ mx.exec.update.grad.arrays(m$rnn.exec, grad.arrays, match.name=TRUE)
+
+ }
+
+ train.nll <- train.nll + calc.nll(as.array(seq.label.probs), batch.size)
+
+ nbatch <- nbatch + seq.len
+ if ((epoch.counter %% log.period) == 0) {
+ cat(paste0("Epoch [", epoch.counter,
+ "] Train: NLL=", train.nll / nbatch,
+ ", Perp=", exp(train.nll / nbatch), "\n"))
+ }
+ }
+ train.data$reset()
+ # end of training loop
+ toc <- Sys.time()
+ cat(paste0("Iter [", iteration,
+ "] Train: Time: ", as.numeric(toc - tic, units="secs"),
+ " sec, NLL=", train.nll / nbatch,
+ ", Perp=", exp(train.nll / nbatch), "\n"))
+
+ if (!is.null(eval.data)) {
+ val.nll <- 0.0
+ # validation set, reset states
+ init.states <- list()
+ for (name in init.states.name) {
+ init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0
+ }
+ mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
+
+ eval.data$reset()
+ nbatch <- 0
+ while (eval.data$iter.next()) {
+ # set rnn input
+ rnn.input <- eval.data$value()
+ mx.exec.update.arg.arrays(m$rnn.exec, rnn.input, match.name=TRUE)
+ mx.exec.forward(m$rnn.exec, is.train=FALSE)
+ # probability of each label class, used to evaluate nll
+ seq.label.probs <- mx.nd.choose.element.0index(m$rnn.exec$ref.outputs[["sm_output"]], get.label(m$rnn.exec$ref.arg.arrays[["label"]], ctx))
+ # transfer the states
+ init.states <- list()
+ for (name in init.states.name) {
+ init.states[[name]] <- m$rnn.exec$ref.arg.arrays[[name]]*0
+ }
+ mx.exec.update.arg.arrays(m$rnn.exec, init.states, match.name=TRUE)
+ val.nll <- val.nll + calc.nll(as.array(seq.label.probs), batch.size)
+ nbatch <- nbatch + seq.len
+ }
+ eval.data$reset()
+ perp <- exp(val.nll / nbatch)
+ cat(paste0("Iter [", iteration,
+ "] Val: NLL=", val.nll / nbatch,
+ ", Perp=", exp(val.nll / nbatch), "\n"))
+ }
+ }
+
+ return (m)
+}
+
+# check data and translate data into iterator if data is array/matrix
+check.data <- function(data, batch.size, is.train) {
+ if (!is.null(data) && !is.list(data) && !is.mx.dataiter(data)) {
+ stop("The dataset should be either a mx.io.DataIter or a R list")
+ }
+ if (is.list(data)) {
+ if (is.null(data$data) || is.null(data$label)){
+ stop("Please provide dataset as list(data=R.array, label=R.array)")
+ }
+ data <- mx.model.init.iter.rnn(data$data, data$label, batch.size=batch.size, is.train = is.train)
+ }
+ if (!is.null(data) && !data$iter.next()) {
+ data$reset()
+ if (!data$iter.next()) stop("Empty input")
+ }
+ return (data)
+}
\ No newline at end of file
diff --git a/R-package/man/mx.gru.Rd b/R-package/man/mx.gru.Rd
new file mode 100644
index 000000000000..deca62cfa85a
--- /dev/null
+++ b/R-package/man/mx.gru.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gru.R
+\name{mx.gru}
+\alias{mx.gru}
+\title{Training GRU Unrolled Model}
+\usage{
+mx.gru(train.data, eval.data = NULL, num.gru.layer, seq.len, num.hidden,
+ num.embed, num.label, batch.size, input.size, ctx = mx.ctx.default(),
+ num.round = 10, update.period = 1, initializer = mx.init.uniform(0.01),
+ dropout = 0, optimizer = "sgd", ...)
+}
+\arguments{
+\item{train.data}{mx.io.DataIter or list(data=R.array, label=R.array)
+The Training set.}
+
+\item{eval.data}{mx.io.DataIter or list(data=R.array, label=R.array), optional
+The validation set used for validation evaluation during the progress.}
+
+\item{num.gru.layer}{integer
+The number of the layer of gru.}
+
+\item{seq.len}{integer
+The length of the input sequence.}
+
+\item{num.hidden}{integer
+The number of hidden nodes.}
+
+\item{num.embed}{integer
+The output dim of embedding.}
+
+\item{num.label}{integer
+The number of labels.}
+
+\item{batch.size}{integer
+The batch size used for R array training.}
+
+\item{input.size}{integer
+The input dim of one-hot encoding of embedding}
+
+\item{ctx}{mx.context, optional
+The device used to perform training.}
+
+\item{num.round}{integer, default=10
+The number of iterations over training data to train the model.}
+
+\item{update.period}{integer, default=1
+The number of iterations to update parameters during training period.}
+
+\item{initializer}{initializer object. default=mx.init.uniform(0.01)
+The initialization scheme for parameters.}
+
+\item{dropout}{float, default=0
+A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.}
+
+\item{optimizer}{string, default="sgd"
+The optimization method.}
+
+\item{...}{other parameters passing to \code{mx.gru}/.}
+}
+\value{
+model A trained gru unrolled model.
+}
+\description{
+Training GRU Unrolled Model
+}
+
diff --git a/R-package/man/mx.gru.forward.Rd b/R-package/man/mx.gru.forward.Rd
new file mode 100644
index 000000000000..cedc27bd85a4
--- /dev/null
+++ b/R-package/man/mx.gru.forward.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gru.R
+\name{mx.gru.forward}
+\alias{mx.gru.forward}
+\title{Using forward function to predict in gru inference model}
+\usage{
+mx.gru.forward(model, input.data, new.seq = FALSE)
+}
+\arguments{
+\item{model}{gru model
+A gru inference model}
+
+\item{input.data, }{array.matrix
+The input data for forward function}
+
+\item{new.seq}{boolean, default=FALSE
+Whether the input is the start of a new sequence}
+}
+\value{
+result A list(prob=prob, model=model) containing the result probability of each label and the model.
+}
+\description{
+Using forward function to predict in gru inference model
+}
+
diff --git a/R-package/man/mx.gru.inference.Rd b/R-package/man/mx.gru.inference.Rd
new file mode 100644
index 000000000000..85c66ed8a781
--- /dev/null
+++ b/R-package/man/mx.gru.inference.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/gru.R
+\name{mx.gru.inference}
+\alias{mx.gru.inference}
+\title{Create a GRU Inference Model}
+\usage{
+mx.gru.inference(num.gru.layer, input.size, num.hidden, num.embed, num.label,
+ batch.size = 1, arg.params, ctx = mx.cpu(), dropout = 0)
+}
+\arguments{
+\item{num.gru.layer}{integer
+The number of the layer of gru.}
+
+\item{input.size}{integer
+The input dim of one-hot encoding of embedding}
+
+\item{num.hidden}{integer
+The number of hidden nodes.}
+
+\item{num.embed}{integer
+The output dim of embedding.}
+
+\item{num.label}{integer
+The number of labels.}
+
+\item{batch.size}{integer, default=1
+The batch size used for R array training.}
+
+\item{arg.params}{list
+The batch size used for R array training.}
+
+\item{ctx}{mx.context, optional
+Model parameter, list of name to NDArray of net's weights.}
+
+\item{dropout}{float, default=0
+A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.}
+}
+\value{
+model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+ A gru inference model.
+}
+\description{
+Create a GRU Inference Model
+}
+
diff --git a/R-package/man/mx.lstm.inference.Rd b/R-package/man/mx.lstm.inference.Rd
index af572ee28590..19fe3b7fa368 100644
--- a/R-package/man/mx.lstm.inference.Rd
+++ b/R-package/man/mx.lstm.inference.Rd
@@ -23,7 +23,7 @@ The output dim of embedding.}
\item{num.label}{integer
The number of labels.}
-\item{batch.size}{integer
+\item{batch.size}{integer, default=1
The batch size used for R array training.}
\item{arg.params}{list
@@ -36,7 +36,8 @@ Model parameter, list of name to NDArray of net's weights.}
A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.}
}
\value{
-model a lstm inference model.
+model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+ A lstm inference model.
}
\description{
Create a LSTM Inference Model
diff --git a/R-package/man/mx.opt.rmsprop.Rd b/R-package/man/mx.opt.rmsprop.Rd
new file mode 100644
index 000000000000..d51447200f2e
--- /dev/null
+++ b/R-package/man/mx.opt.rmsprop.Rd
@@ -0,0 +1,39 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/optimizer.R
+\name{mx.opt.rmsprop}
+\alias{mx.opt.rmsprop}
+\title{Create an RMSProp optimizer with respective parameters.
+Reference: Tieleman T, Hinton G. Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude[J]. COURSERA: Neural Networks for Machine Learning, 2012, 4(2).
+The code follows: http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.}
+\usage{
+mx.opt.rmsprop(learning.rate = 0.002, gamma1 = 0.95, gamma2 = 0.9,
+ wd = 0, rescale.grad = 1, clip_gradient = NULL, lr_scheduler = NULL)
+}
+\arguments{
+\item{learning.rate}{float, default=0.002
+Step size.}
+
+\item{gamma1}{float, default=0.95
+decay factor of moving average for gradient, gradient^2.}
+
+\item{wd}{float, default=0.0
+L2 regularization coefficient add to all the weights.}
+
+\item{rescale.grad}{float, default=1.0
+rescaling factor of gradient.}
+
+\item{clip_gradient}{float, optional
+clip gradient in range [-clip_gradient, clip_gradient].}
+
+\item{lr_scheduler}{function, optional
+The learning rate scheduler.}
+
+\item{gamm2}{float, default=0.9
+"momentum" factor.}
+}
+\description{
+Create an RMSProp optimizer with respective parameters.
+Reference: Tieleman T, Hinton G. Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude[J]. COURSERA: Neural Networks for Machine Learning, 2012, 4(2).
+The code follows: http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.
+}
+
diff --git a/R-package/man/mx.rnn.Rd b/R-package/man/mx.rnn.Rd
new file mode 100644
index 000000000000..c40915c98275
--- /dev/null
+++ b/R-package/man/mx.rnn.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rnn.R
+\name{mx.rnn}
+\alias{mx.rnn}
+\title{Training RNN Unrolled Model}
+\usage{
+mx.rnn(train.data, eval.data = NULL, num.rnn.layer, seq.len, num.hidden,
+ num.embed, num.label, batch.size, input.size, ctx = mx.ctx.default(),
+ num.round = 10, update.period = 1, initializer = mx.init.uniform(0.01),
+ dropout = 0, optimizer = "sgd", batch.norm = FALSE, ...)
+}
+\arguments{
+\item{train.data}{mx.io.DataIter or list(data=R.array, label=R.array)
+The Training set.}
+
+\item{eval.data}{mx.io.DataIter or list(data=R.array, label=R.array), optional
+The validation set used for validation evaluation during the progress.}
+
+\item{num.rnn.layer}{integer
+The number of the layer of rnn.}
+
+\item{seq.len}{integer
+The length of the input sequence.}
+
+\item{num.hidden}{integer
+The number of hidden nodes.}
+
+\item{num.embed}{integer
+The output dim of embedding.}
+
+\item{num.label}{integer
+The number of labels.}
+
+\item{batch.size}{integer
+The batch size used for R array training.}
+
+\item{input.size}{integer
+The input dim of one-hot encoding of embedding}
+
+\item{ctx}{mx.context, optional
+The device used to perform training.}
+
+\item{num.round}{integer, default=10
+The number of iterations over training data to train the model.}
+
+\item{update.period}{integer, default=1
+The number of iterations to update parameters during training period.}
+
+\item{initializer}{initializer object. default=mx.init.uniform(0.01)
+The initialization scheme for parameters.}
+
+\item{dropout}{float, default=0
+A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.}
+
+\item{optimizer}{string, default="sgd"
+The optimization method.}
+
+\item{batch.norm}{boolean, default=FALSE
+Whether to use batch normalization.}
+
+\item{...}{other parameters passing to \code{mx.rnn}/.}
+}
+\value{
+model A trained rnn unrolled model.
+}
+\description{
+Training RNN Unrolled Model
+}
+
diff --git a/R-package/man/mx.rnn.forward.Rd b/R-package/man/mx.rnn.forward.Rd
new file mode 100644
index 000000000000..c8763b6c1ad7
--- /dev/null
+++ b/R-package/man/mx.rnn.forward.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rnn.R
+\name{mx.rnn.forward}
+\alias{mx.rnn.forward}
+\title{Using forward function to predict in rnn inference model}
+\usage{
+mx.rnn.forward(model, input.data, new.seq = FALSE)
+}
+\arguments{
+\item{model}{rnn model
+A rnn inference model}
+
+\item{input.data, }{array.matrix
+The input data for forward function}
+
+\item{new.seq}{boolean, default=FALSE
+Whether the input is the start of a new sequence}
+}
+\value{
+result A list(prob=prob, model=model) containing the result probability of each label and the model.
+}
+\description{
+Using forward function to predict in rnn inference model
+}
+
diff --git a/R-package/man/mx.rnn.inference.Rd b/R-package/man/mx.rnn.inference.Rd
new file mode 100644
index 000000000000..56e00e62f620
--- /dev/null
+++ b/R-package/man/mx.rnn.inference.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rnn.R
+\name{mx.rnn.inference}
+\alias{mx.rnn.inference}
+\title{Create a RNN Inference Model}
+\usage{
+mx.rnn.inference(num.rnn.layer, input.size, num.hidden, num.embed, num.label,
+ batch.size = 1, arg.params, ctx = mx.cpu(), dropout = 0,
+ batch.norm = FALSE)
+}
+\arguments{
+\item{num.rnn.layer}{integer
+The number of the layer of rnn.}
+
+\item{input.size}{integer
+The input dim of one-hot encoding of embedding}
+
+\item{num.hidden}{integer
+The number of hidden nodes.}
+
+\item{num.embed}{integer
+The output dim of embedding.}
+
+\item{num.label}{integer
+The number of labels.}
+
+\item{batch.size}{integer, default=1
+The batch size used for R array training.}
+
+\item{arg.params}{list
+The batch size used for R array training.}
+
+\item{ctx}{mx.context, optional
+Model parameter, list of name to NDArray of net's weights.}
+
+\item{dropout}{float, default=0
+A number in [0,1) containing the dropout ratio from the last hidden layer to the output layer.}
+
+\item{batch.norm}{boolean, default=FALSE
+Whether to use batch normalization.}
+}
+\value{
+model list(rnn.exec=integer, symbol=mxnet symbol, num.rnn.layer=integer, num.hidden=integer, seq.len=integer, batch.size=integer, num.embed=integer)
+ A rnn inference model.
+}
+\description{
+Create a RNN Inference Model
+}
+
diff --git a/R-package/vignettes/CallbackFunctionTutorial.Rmd b/R-package/vignettes/CallbackFunctionTutorial.Rmd
index 85cd78be90b0..97b6ce3161a0 100644
--- a/R-package/vignettes/CallbackFunctionTutorial.Rmd
+++ b/R-package/vignettes/CallbackFunctionTutorial.Rmd
@@ -6,7 +6,7 @@ which can very useful in model training.
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/CallbackFunctionTutorial.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/CallbackFunctionTutorial.html)
- You can find the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/CallbackFunctionTutorial.Rmd)
diff --git a/R-package/vignettes/CharRnnModel.Rmd b/R-package/vignettes/CharRnnModel.Rmd
index 9066d60f7513..2cb4b00ec1ac 100644
--- a/R-package/vignettes/CharRnnModel.Rmd
+++ b/R-package/vignettes/CharRnnModel.Rmd
@@ -3,12 +3,12 @@ Char RNN Example
This example aims to show how to use lstm model to build a char level language model, and generate text from it. We use a tiny shakespeare text for demo purpose.
-Data can be found at https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare.
+Data can be found at [here](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare)
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/package/r/CharRnnModel.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/CharRnnModel.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/CharRnnModel.Rmd)
Load Data
@@ -21,10 +21,10 @@ Set basic network parameters.
```{r}
batch.size = 32
seq.len = 32
-num.hidden = 256
-num.embed = 256
-num.lstm.layer = 2
-num.round = 3
+num.hidden = 16
+num.embed = 16
+num.lstm.layer = 1
+num.round = 1
learning.rate= 0.1
wd=0.00001
clip_gradient=1
@@ -161,33 +161,7 @@ model <- mx.lstm(X.train, X.val,
clip_gradient=clip_gradient)
```
-Setting the parameters ctx=mx.gpu(0) and num.round=5 can get the following result.
-```
-Epoch [31] Train: NLL=3.47213018872144, Perp=32.2052727363657
-...
-Epoch [961] Train: NLL=2.32060007657895, Perp=10.181782322355
-Iter [1] Train: Time: 186.397065639496 sec, NLL=2.31135356537961, Perp=10.0880702804858
-Iter [1] Val: NLL=1.94184484060012, Perp=6.97160060607419
-Epoch [992] Train: NLL=1.84784553299322, Perp=6.34613225095329
-...
-Epoch [1953] Train: NLL=1.70175791172558, Perp=5.48357857093351
-Iter [2] Train: Time: 188.929051160812 sec, NLL=1.70103940328978, Perp=5.47963998859367
-Iter [2] Val: NLL=1.74979316010449, Perp=5.75341251767988
-...
-Epoch [2914] Train: NLL=1.54738185300295, Perp=4.69915099483974
-Iter [3] Train: Time: 185.425321578979 sec, NLL=1.54604189517013, Perp=4.69285854740519
-Iter [3] Val: NLL=1.67780240235925, Perp=5.35377758479576
-Epoch [2945] Train: NLL=1.48868466087876, Perp=4.43126307034767
-...
-Iter [4] Train: Time: 185.487086296082 sec, NLL=1.4744973925858, Perp=4.36883940994296
-Iter [4] Val: NLL=1.64488167325603, Perp=5.18039689118454
-Epoch [3937] Train: NLL=1.46355541021581, Perp=4.32129622881604
-...
-Epoch [4898] Train: NLL=1.42900458455642, Perp=4.17454171976281
-Iter [5] Train: Time: 185.070136785507 sec, NLL=1.42909226256273, Perp=4.17490775130428
-Iter [5] Val: NLL=1.62716655804022, Perp=5.08943365437187
-```
Inference from model
--------------------
helper function for random sample.
@@ -225,15 +199,12 @@ choice <- function(weights) {
```
we can use random output or fixed output by choosing largest probability.
```{r}
-make.output <- function(prob, sample=FALSE, temperature=1.) {
+make.output <- function(prob, sample=FALSE) {
if (!sample) {
idx <- which.max(as.array(prob))
}
else {
- scale_prob <- mx.nd.clip(prob, 1e-6, 1 - 1e-6)
- rescale <- mx.nd.exp(mx.nd.log(scale_prob) / temperature)
- rescale <- rescale / (as.array(mx.nd.sum(rescale))[1])
- idx <- choice(rescale)
+ idx <- choice(prob)
}
return (idx)
@@ -252,7 +223,7 @@ infer.model <- mx.lstm.inference(num.lstm.layer=num.lstm.layer,
ctx=mx.cpu())
```
generate a sequence of 75 chars using function `mx.lstm.forward`.
-```
+```{r}
start <- 'a'
seq.len <- 75
random.sample <- TRUE
@@ -273,4 +244,10 @@ The result:
```
ah not a drobl greens
Settled asing lately sistering sounted to their hight
-```
\ No newline at end of file
+```
+
+Other RNN models
+----------------
+In `mxnet`, other RNN models like custom RNN and gru is also provided.
+- For **custom RNN model**, you can replace `mx.lstm` with `mx.rnn` to train rnn model. Also, you can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.rnn.inference` and `mx.rnn.forward` to inference from rnn model and get forward result from the inference model.
+- For **GRU model**, you can replace `mx.lstm` with `mx.gru` to train gru model. Also, you can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.gru.inference` and `mx.gru.forward` to inference from gru model and get forward result from the inference model.
\ No newline at end of file
diff --git a/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd b/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd
index 32fdafd38145..6b58946eaa31 100644
--- a/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd
+++ b/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd
@@ -7,13 +7,13 @@ algorithm can do is to classify real world images.
In this example we will show how to use a pretrained Inception-BatchNorm Network to predict the class of
real world image. The network architecture is decribed in [1].
-The pre-trained Inception-BatchNorm network is able to be downloaded from [this link](http://webdocs.cs.ualberta.ca/~bx3/data/Inception.zip)
+The pre-trained Inception-BatchNorm network is able to be downloaded from [this link](http://data.dmlc.ml/mxnet/data/Inception.zip)
This model gives the recent state-of-art prediction accuracy on image net dataset.
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/classifyRealImageWithPretrainedModel.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/classifyRealImageWithPretrainedModel.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd)
Pacakge Loading
@@ -69,7 +69,7 @@ preproc.image <- function(im, mean.image) {
shape <- dim(im)
short.edge <- min(shape[1:2])
xx <- floor((shape[1] - short.edge) / 2)
- yy <- floor((shape[2] - short.edge) / 2)
+ yy <- floor((shape[2] - short.edge) / 2)
croped <- crop.borders(im, xx, yy)
# resize to 224 x 224, needed by input of the model.
resized <- resize(croped, 224, 224)
diff --git a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
index efb0dba98109..66ac18ef3806 100644
--- a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
+++ b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd
@@ -8,7 +8,7 @@ We will show you how to do classification and regression tasks respectively. The
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/fiveMinutesNeuralNetwork.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/fiveMinutesNeuralNetwork.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd)
## Classification
diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd
index a81613b4a59e..6387b4ba1694 100644
--- a/R-package/vignettes/mnistCompetition.Rmd
+++ b/R-package/vignettes/mnistCompetition.Rmd
@@ -5,7 +5,7 @@ Handwritten Digits Classification Competition
We will present the basic usage of [mxnet](https://github.com/dmlc/mxnet/tree/master/R-package) to compete in this challenge.
This tutorial is written in Rmarkdown. You can download the source [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/mnistCompetition.Rmd) and view a
-hosted version of tutorial [here](http://mxnet.readthedocs.org/en/latest/R-package/mnistCompetition.html).
+hosted version of tutorial [here](http://mxnet.readthedocs.io/en/latest/packages/r/mnistCompetition.html).
## Data Loading
diff --git a/amalgamation/Makefile b/amalgamation/Makefile
index 1111305b5bba..c23210a1a8b5 100644
--- a/amalgamation/Makefile
+++ b/amalgamation/Makefile
@@ -1,14 +1,14 @@
export MXNET_ROOT=`pwd`/..
# Change this to path of openblas
-export OPENBLAS_ROOT=`pwd`/OpenBLAS
+export OPENBLAS_ROOT=${MXNET_ROOT}/../OpenBLAS
# Whether use minimum build without blas and SSE, this will make the library super slow
ifndef MIN
- export MIN= 0
+ export MIN=0
endif
ifndef ANDROID
- export ANDROID=0
+ export ANDROID=0
endif
@@ -17,8 +17,8 @@ endif
CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall
ifneq ($(MIN), 1)
- CFLAGS+= -I${OPENBLAS_ROOT}/include
- LDFLAGS+=-L${OPENBLAS_ROOT}/lib -lopenblas
+ CFLAGS += -I${OPENBLAS_ROOT} -I${OPENBLAS_ROOT}/include
+ LDFLAGS+= -L${OPENBLAS_ROOT} -L${OPENBLAS_ROOT}/lib -lopenblas
endif
@@ -68,4 +68,4 @@ ${MXNET_ROOT}/lib/libmxnet_predict.so: mxnet_predict-all.o
ls -alh $@
clean:
- rm -f *.d *.o
+ rm -f *.d *.o *.so *.a mxnet_predict-all.cc
diff --git a/amalgamation/README.md b/amalgamation/README.md
index f96a11d7ed57..9d4e3fe9c8a3 100644
--- a/amalgamation/README.md
+++ b/amalgamation/README.md
@@ -24,7 +24,7 @@ This module is created by [Jack Deng](https://github.com/jdeng).
Android
---------------
-Setup NDK and build your standalone toolchain. [Instructions](http://developer.android.com/ndk/guides/standalone_toolchain.html#itc) Use the Advanced Method!!! In particular set PATH, CC and CXX.
+Setup NDK and build your standalone toolchain. [Instructions](http://developer.android.com/ndk/guides/standalone_toolchain.html#itc) Use the Advanced Method!!! In particular set PATH, CC and CXX. The minimum API level required is 16.
Example:
```
@@ -33,7 +33,7 @@ export CC=arm-linux-androideabi-gcc # or export CC=arm-linux-androideabi-clang
export CXX=arm-linux-androideabi-g++ # or export CXX=arm-linux-androideabi-clang++
```
-Build OpenBlas for Android: [Build OpenBlas](https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android)
+Build OpenBLAS for Android: [Build OpenBLAS](https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android) Please put OpenBLAS source code outside mxnet directory.
Modify OPENBLAS_ROOT in Makefile
Type ```make ANDROID=1```
diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py
index 9016db58535b..be854180ceb1 100644
--- a/amalgamation/amalgamation.py
+++ b/amalgamation/amalgamation.py
@@ -30,14 +30,15 @@ def get_sources(def_file):
sources = []
files = []
visited = set()
+ mxnet_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))
for line in open(def_file):
files = files + line.strip().split(' ')
for f in files:
f = f.strip()
- if not f or f.endswith('.o') or f == '\\': continue
+ if not f or f.endswith('.o:') or f == '\\': continue
fn = os.path.relpath(f)
- if fn.find('/usr/') < 0 and fn not in visited:
+ if os.path.abspath(f).startswith(mxnet_path) and fn not in visited:
sources.append(fn)
visited.add(fn)
return sources
diff --git a/dmlc-core b/dmlc-core
index 9fd3b48462a7..755f577a38cf 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 9fd3b48462a7a651e12a197679f71e043dcb25a2
+Subproject commit 755f577a38cf3aa07f38a2667ffc583d22195e52
diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
index 10311cb31bf8..1e5a956450dc 100644
--- a/docker/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y build-essential git libopenblas-dev lib
RUN git clone --recursive https://github.com/dmlc/mxnet/ && cd mxnet && \
cp make/config.mk . && \
echo "USE_BLAS=openblas" >>config.mk && \
- make -j8
+ make -j$(nproc)
# python pakcage
RUN apt-get install -y python-numpy wget unzip
diff --git a/docker/cuda/Dockerfile b/docker/cuda/Dockerfile
index 8796b70aa1c9..fff84352bf16 100644
--- a/docker/cuda/Dockerfile
+++ b/docker/cuda/Dockerfile
@@ -9,7 +9,7 @@ RUN git clone --recursive https://github.com/dmlc/mxnet/ && cd mxnet && \
echo "USE_CUDA_PATH=/usr/local/cuda" >>config.mk && \
echo "USE_CUDNN=1" >>config.mk && \
echo "USE_BLAS=openblas" >>config.mk && \
- make -j8 ADD_LDFLAGS=-L/usr/local/cuda/lib64/stubs
+ make -j$(nproc) ADD_LDFLAGS=-L/usr/local/cuda/lib64/stubs
ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:$LD_LIBRARY_PATH
# python pakcage
diff --git a/docs/_static/js/auto_module_index.js b/docs/_static/js/auto_module_index.js
index b918ecdc1635..e0238ed391f8 100644
--- a/docs/_static/js/auto_module_index.js
+++ b/docs/_static/js/auto_module_index.js
@@ -21,5 +21,4 @@ function auto_index(module) {
html += "";
li_node.append(html);
});
-}
-
+}
\ No newline at end of file
diff --git a/docs/_static/mxnet-theme/index.html b/docs/_static/mxnet-theme/index.html
index e0898a8b0567..a0901e42783d 100644
--- a/docs/_static/mxnet-theme/index.html
+++ b/docs/_static/mxnet-theme/index.html
@@ -1,8 +1,12 @@
-
+
-
Flexible and Efficient Library for Deep Learning
+
Flexible and Efficient Library for Deep Learning
+
+
-
-
+
+
diff --git a/docs/_static/mxnet-theme/layout.html b/docs/_static/mxnet-theme/layout.html
index 62ebf399e936..1f444c640dbf 100644
--- a/docs/_static/mxnet-theme/layout.html
+++ b/docs/_static/mxnet-theme/layout.html
@@ -56,7 +56,7 @@
};
- {% for name in ['jquery.js', 'underscore.js', 'doctools.js', 'searchtools.js'] %}
+ {% for name in ['jquery.js', 'underscore.js', 'doctools.js', 'searchtools.js', 'selectlang.js'] %}
{% endfor %}
@@ -76,7 +76,7 @@
{%- endif %}
-
+
{%- endmacro %}
diff --git a/docs/_static/mxnet-theme/navbar.html b/docs/_static/mxnet-theme/navbar.html
index 587665f7912c..127cb40b2045 100644
--- a/docs/_static/mxnet-theme/navbar.html
+++ b/docs/_static/mxnet-theme/navbar.html
@@ -1,3 +1,4 @@
+
+
+
+
+Previous Navbar Layout End -->
+
+
diff --git a/docs/_static/mxnet.css b/docs/_static/mxnet.css
index f4862a706b9b..83de570c5683 100644
--- a/docs/_static/mxnet.css
+++ b/docs/_static/mxnet.css
@@ -1,6 +1,300 @@
+/* basic style */
+a, abbr, acronym, address, applet, big, blockquote, body, caption, cite, code, dd, del, dfn, div, dl, dt, em, fieldset, form, h1, h2, h3, h4, h5, h6, html, iframe, img, ins, kbd, label, legend, li, object, ol, p, q, s, samp, small, span, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, ul, var {
+ margin: 0;
+ padding: 0;
+ border: 0;
+ outline: 0;
+ font-weight: inherit;
+ font-style: inherit;
+ font-family: inherit;
+ font-size: 100 %;
+ vertical-align: baseline
+}
+
+body {
+ background: #fff;
+ color: #000;
+ font-family: Lato, Helvetica, "Helvetica Neue", Arial, sans-serif;
+ font-size: 16px;
+ font-weight: 400;
+ line-height: 1.6;
+ text-rendering: optimizeLegibility;
+ -webkit-font-smoothing: antialiased;
+ -moz-osx-font-smoothing: grayscale;
+}
+
+ol,
+ul {
+ list-style: none
+}
+
+table {
+ border-collapse: separate;
+ border-spacing: 0
+}
+
+caption,
+table,
+td,
+th {
+ vertical-align: middle
+}
+
+caption,
+td,
+th {
+ text-align: left;
+ font-weight: 400
+}
+
+a:hover,
+a:focus,
+a:active {
+ text-decoration: none;
+}
+
+a img {
+ border: none
+}
+
+
+
+html {
+ box-sizing: border-box;
+}
+
+*,
+: after,
+: before {
+ box-sizing: inherit
+}
+
+button::-moz-focus-inner,
+input[type=button]::-moz-focus-inner,
+input[type=reset]::-moz-focus-inner,
+input[type=submit]::-moz-focus-inner {
+ padding: 0;
+ margin: 0;
+ border: 0
+}
+
+button,
+input,
+select {
+ margin: 0;
+ padding: 0;
+ border: 0
+}
+
+@media screen {
+ body,
+ html {
+ height: 100 %;
+ }
+}
+/* basic end */
+
+/*** code style ***/
+/*code block style*/
+.highlight {
+ border-radius: 4px;
+}
+
+pre {
+ border: 0;
+ line-height: 1.6;
+ margin: 0 0 16px;
+ padding: 10px 16px;
+ word-break: break-all;
+ word-wrap: break-word;
+}
+
+/*code inline style*/
+code.docutils, code.literal {
+ padding: 3px 5px;
+}
+/*** code style end ***/
+
+body > .container {
+ padding-top: 80px
+}
+
/* header section */
-.splash{
- padding:5em 0 1em 0;
+/* navbar */
+.navbar {
+ background-color:#0079b2;
+ opacity: 0.9;
+ border: 0px;
+ height: 60px;
+ padding: 0 80px;
+ margin-bottom: 0px;
+}
+
+#header-inner {
+ display: -webkit-box;
+ display: -webkit-flex;
+ display: -ms-flexbox;
+ display: box;
+ display: flex;
+ -webkit-box-orient: horizontal;
+ -moz-box-orient: horizontal;
+ -webkit-box-lines: single;
+ -moz-box-lines: single;
+ -webkit-flex-flow: row nowrap;
+ -ms-flex-flow: row nowrap;
+ flex-flow: row;
+ -webkit-box-align: center;
+ -ms-flex-align: center;
+ -webkit-align-items: center;
+ align-items: center
+}
+
+@media screen and(max-width: 768 px) {
+ #header-inner {
+ -webkit-box-pack: center;
+ -ms-flex-pack: center;
+ -webkit-justify-content: center;
+ justify-content: center
+ }
+}
+
+#logo-wrap {
+ -webkit-box-flex: 1;
+ box-flex: 1;
+ -webkit-flex: 0 50 px;
+ -ms-flex: 0 50 px;
+ flex: 0 50 px
+}
+
+#logo {
+ width: 150px;
+ display: block;
+ float: left;
+ height: 60px;
+ padding: 10px 0 0 0;
+}
+
+#logo > img {
+ display: block;
+ width: 110px;
+}
+
+#main-nav {
+ display: none;
+ -webkit-box-flex: 1;
+ box-flex: 1;
+ -webkit-flex: 1 auto;
+ -ms-flex: 1 auto;
+ flex: 1 auto;
+}
+
+@media screen and (min-width:769px) {
+ #main-nav {
+ display: block
+ }
+}
+
+.main-nav-link {
+ color: #fff;
+ text-decoration: none;
+ line-height: 50px;
+ opacity: .7;
+ -webkit-transition: .2s;
+ transition: .2s;
+ font-family: Lato, "Helvetica Neue", Helvetica, Arial, sans-serif;
+ display: inline-block;
+ padding: 0 15px
+}
+
+.main-nav-link:hover {
+ opacity: 1;
+ color: #1094e8;
+ text-decoration: none;
+}
+
+#dropdown-menu-position-anchor {
+ position: relative;
+}
+
+#package-dropdown-menu {
+ top: 36px;
+ border-radius: 4px;
+ padding: 0;
+}
+
+#package-dropdown-menu > li > a {
+ color: #0079b2;
+ padding: 6px 16px;
+
+}
+
+#search-input-wrap {
+ display: none;
+ padding-left: 6px;
+ padding-bottom: 8px;
+ border-bottom: 1px solid #999
+}
+
+#search-input-icon,
+#search-input-wrap.on {
+ display: inline-block
+}
+
+#search-input-icon {
+ color: #fff;
+ padding-right: .5em;
+ opacity: .7
+}
+
+#search-input {
+ background: none;
+ font-size: inherit;
+ font-family: Lato, Helvetica Neue, Helvetica, Arial, sans-serif;
+ color: #fff;
+ outline: none;
+ -webkit-appearance: none
+}
+
+#lang-select-wrap {
+ display: none;
+ position: relative
+}
+
+@media screen and (min-width:769px) {
+ #lang-select-wrap {
+ display: block
+ }
+}
+
+#lang-select-label {
+ color: #fff;
+ opacity: .7;
+ font-family: Lato, Helvetica Neue, Helvetica, Arial, sans-serif;
+ line-height: 50px
+}
+
+#lang-select-label span {
+ padding-left: 8px
+}
+
+#lang-select-label i {
+ opacity: .7
+}
+
+#lang-select {
+ opacity: 0;
+ position: absolute;
+ top: 0;
+ left: 0;
+ width: 100%;
+ height: 100%;
+ -webkit-appearance: menulist-button;
+ font-size: inherit
+}
+
+/* banner */
+#splash{
+ padding:60px 0 0 0;
background-color:#0079b2;
/* background-image:url(../img/bg.jpg); */
background-size:cover;
@@ -9,27 +303,59 @@
text-align:center
}
-.splash h1{
+#splash #banner {
+ text-align: center
+}
+
+#splash #banner-title {
+ padding: 20px 0 10px 0;
+ font-size: 40px;
+ line-height: 1.15;
+ font-weight: 300;
+ font-family: Lato, Helvetica Neue, Helvetica, Arial, sans-serif;
+}
+
+@media screen and (min-width:769px) {
+ #splash #banner-title {
+ padding-top: 100px;
+ }
+}
+
+#splash h1{
font-size: 40px;
margin-bottom: 20px;
}
-.splash .social{
- margin:2em 0
+
+#splash #social{
+ margin:2em 0 4em 0;
}
-.splash .get_start {
+#splash #get_start {
margin:2em 0
}
-.splash .get_start_btn {
- border: 2px solid #FFFFFF;
- border-radius: 5px;
- color: #FFFFFF;
- display: inline-block;
- font-size: 26px;
- padding: 9px 20px;
+#splash #get_start_btn {
+ border: 1.8px solid #FFFFFF;
+ border-radius: 2px;
+ color: #FFFFFF;
+ display: inline-block;
+ font-size: 22px;
+ font-family: Helvetica, Helvetica Neue, Arial, sans-serif;
+ padding: 8px 20px;
+ -webkit-transition: .2s;
+ transition: .2s;
+}
+
+#splash #get_start_btn:hover {
+ background-color: #FFFFFF;
+ color: #0079b2;
+ opacity: 0.9;
}
+
+
+
+
.section-tout{
padding:3em 0 3em;
border-bottom:1px solid rgba(0,0,0,.05);
@@ -112,45 +438,11 @@ div.sphinxsidebar ul ul { margin-left: 15px }
padding-right: 15px
}
-body>.container {
- padding-top: 80px
-}
-body {
- font-size: 16px;
-}
-pre {
- font-size: 14px;
-}
-/* navbar */
-.navbar {
- background-color:#0079b2;
- border: 0px;
- height: 65px;
-}
-.navbar-right li {
- display:inline-block;
- vertical-align:top;
- padding: 22px 4px;
-}
-
-.navbar-left li {
- display:inline-block;
- vertical-align:top;
- padding: 17px 10px;
- /* margin: 0 5px; */
-}
+/*embed end */
-.navbar-left li a {
- font-size: 22px;
- color: #fff;
-}
-
-.navbar-left > li > a:hover{
- color:#fff;
-}
.flag-icon {
background-size: contain;
background-position: 50%;
@@ -179,18 +471,7 @@ pre {
/* padding: 10px; */
/* } */
-.navbar-brand >img {
- width: 110px;
-}
-.dropdown-menu li {
- padding: 0px 0px;
- width: 120px;
-}
-.dropdown-menu li a {
- color: #0079b2;
- font-size: 20px;
-}
.section h1 {
padding-top: 90px;
diff --git a/docs/_static/selectlang.js b/docs/_static/selectlang.js
new file mode 100644
index 000000000000..25337abcb22b
--- /dev/null
+++ b/docs/_static/selectlang.js
@@ -0,0 +1,25 @@
+function changeLanguage(langSelect, langSelectLabel, rootpath){
+ langSelect.change(function() {
+ var lang = langSelect.val();
+ if(lang == 'zh'){
+ location.href = rootpath + 'zh/index.html';
+ } else {
+ location.href = rootpath + 'index.html';
+ }
+ });
+}
+
+$(document).ready(function () {
+ var langSelect = $("#lang-select");
+ var langSelectLabel = $("#lang-select-label > span");
+ currHref = location.href;
+
+ if(/\/zh\//.test(currHref)){
+ langSelect.val("zh");
+ } else {
+ langSelect.val("en");
+ }
+ langSelectLabel.text($("option:selected").text());
+
+ changeLanguage(langSelect, langSelectLabel, getRootPath());
+})
\ No newline at end of file
diff --git a/docs/how_to/build.md b/docs/how_to/build.md
index 458493c70672..4ef8ac5420b7 100644
--- a/docs/how_to/build.md
+++ b/docs/how_to/build.md
@@ -59,7 +59,7 @@ sudo apt-get install -y build-essential git libatlas-base-dev libopencv-dev
Then build mxnet
```bash
git clone --recursive https://github.com/dmlc/mxnet
-cd mxnet; make -j4
+cd mxnet; make -j$(nproc)
```
### Building on OSX
@@ -77,9 +77,21 @@ Then build mxnet
```bash
git clone --recursive https://github.com/dmlc/mxnet
-cd mxnet; cp make/osx.mk ./config.mk; make -j4
+cd mxnet; cp make/osx.mk ./config.mk; make -j$(sysctl -n hw.ncpu)
```
+Or use cmake command and Xcode
+
+```bash
+mkdir build; cd build
+cmake -G Xcode -DCMAKE_BUILD_TYPE=Release -DCMAKE_CONFIGURATION_TYPES="Release" -DUSE_OPENMP="OFF" -DUSE_CUDNN="OFF" -DUSE_CUDA="OFF" -DBLAS=MKL ..
+```
+
+Then open `mxnet.xcodeproj` by xcode and change two flags in `Build Settings` before building:
+(1) Link-Time Optimization = Yes
+(2) Optimisation Level = Fasteset[-O3]
+
+
Troubleshooting:
Some of the users might meet the link error `ld: library not found for -lgomp`, indicating that the GNU implementation of OpenMP is not in the library path of operating system.
@@ -95,7 +107,7 @@ ln -s path1 /usr/local/lib/libgomp.dylib
```
-then run `make -j4` again.
+then run `make -j$(sysctl -n hw.ncpu)` again.
### Building on Windows
@@ -231,6 +243,37 @@ Now you should have the R package as a tar.gz file and you can install it as a n
R CMD INSTALL mxnet_0.5.tar.gz
```
+If you can't load `mxnet` after enabling CUDA during the installation. Please add following lines into `$RHOME/etc/ldpaths`. You can find your `$RHOME` by using `R.home()` inside R.
+
+```bash
+export CUDA_HOME=/usr/local/cuda
+export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
+```
+
+To install the package using GPU on Windows without building the package from scratch. Note that you need a couple of programs installed already:
+- You'll need the [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit). This depends on Visual Studio, and a free compatible version would be [Visual Studio Community 2013](https://www.visualstudio.com/en-us/news/vs2013-community-vs.aspx). For instructions and compatibility checks, read http://docs.nvidia.com/cuda/cuda-getting-started-guide-for-microsoft-windows/ .
+
+- You will also need to register as a developer at nvidia and download CUDNN V3, https://developer.nvidia.com/cudnn .
+
+
+1. Download the mxnet package as a ZIP from the Github repository https://github.com/dmlc/mxnet and unpack it. You will be editing the `/mxnet/R-package` folder.
+
+2. Download the most recent GPU-enabled package from the [Releases tab](https://github.com/dmlc/mxnet/releases). Unzip this file so you have a folder `/nocudnn`. Note that this file and the folder you'll save it in will be used for future reference and not directly for installing the package. Only some files will be copied from it into the `R-package` folder.
+
+(Note: you now have 2 folders we're working with, possibly in different locations, that we'll reference with `R-package/` and `nocudnn/`.)
+
+3. Download CUDNN V3 from https://developer.nvidia.com/cudnn. Unpack the .zip file and you'll see 3 folders, `/bin`, `/include`, `/lib`. Copy and replace these 3 folders into `nocudnn/3rdparty/cudnn/`, or unpack the .zip file there directly.
+
+4. Create the folder `R-package/inst/libs/x64`. We only support 64-bit operating system now, so you need the x64 folder;
+
+5. Put dll files in `R-package/inst/libs/x64`.
+
+The first dll file you need is `nocudnn/lib/libmxnet.dll`. The other dll files you need are the ones in all 4 subfolders of `nocudnn/3rdparty/`, for the `cudnn` and `openblas` you'll need to look in the `/bin` folders. There should be 11 dll files now in `R-package/inst/libs/x64`.
+
+6. Copy the folder `nocudnn/include/` to `R-package/inst/`. So now you should have a folder `R-package/inst/include/` with 3 subfolders.
+
+7. Run `R CMD INSTALL --no-multiarch R-package`. Make sure that R is added to your PATH in Environment Variables. Running the command `Where R` in Command Prompt should return the location.
+
Note on Library Build:
We isolate the library build with Rcpp end to maximize the portability
diff --git a/docs/how_to/cloud.md b/docs/how_to/cloud.md
index 1f8bfe9907be..26cda6ab8cc0 100644
--- a/docs/how_to/cloud.md
+++ b/docs/how_to/cloud.md
@@ -29,7 +29,7 @@ There are several ways to upload local data to S3. One simple way is using
[s3cmd](http://s3tools.org/s3cmd). For example:
```bash
-wget http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip
+wget http://data.dmlc.ml/mxnet/data/mnist.zip
unzip mnist.zip && s3cmd put t*-ubyte s3://dmlc/mnist/
```
@@ -72,7 +72,7 @@ echo "USE_CUDNN=1" >>config.mk
echo "USE_BLAS=atlas" >> config.mk
echo "USE_DIST_KVSTORE = 1" >>config.mk
echo "USE_S3=1" >>config.mk
-make -j8
+make -j$(nproc)
```
To test whether everything is installed properly, we train a Convolutional neural network on MNIST using a GPU:
diff --git a/docs/how_to/env_var.md b/docs/how_to/env_var.md
index c63ba7a12a53..d15e11386bde 100644
--- a/docs/how_to/env_var.md
+++ b/docs/how_to/env_var.md
@@ -31,6 +31,9 @@ Usually you do not need to change these settings, but they are listed here for r
* MXNET_KVSTORE_BIGARRAY_BOUND (default=1e6)
- The minimum size of "big array".
- When the array size is bigger than this threshold, MXNET_KVSTORE_REDUCTION_NTHREADS threads will be used for reduction.
+* MXNET_CUDNN_AUTOTUNE_DEFAULT (default=0)
+ - The default value of cudnn_tune for convolution layers.
+ - Auto tuning is turn off by default. Set to 1 to turn on by default for benchmarking.
Settings for Minimum Memory Usage
---------------------------------
diff --git a/docs/how_to/multi_devices.md b/docs/how_to/multi_devices.md
index a6611ad971bb..95ad8f8ba120 100644
--- a/docs/how_to/multi_devices.md
+++ b/docs/how_to/multi_devices.md
@@ -140,7 +140,7 @@ start a job by using `ssh`, `mpi`, `sge`, or `yarn`.
Assume we are at the directory `mxnet/example/image-classification`. and want
to train mnist with lenet by using
-[train_mnist.py](https://github.com/dmlc/mxnet/blob/master/example/image-classification/train_mnist.py]).
+[train_mnist.py](https://github.com/dmlc/mxnet/blob/master/example/image-classification/train_mnist.py).
On a single machine we can run by
```bash
diff --git a/docs/packages/python/index.md b/docs/packages/python/index.md
index a9f3a0f2bac5..aa22ebcd2dce 100644
--- a/docs/packages/python/index.md
+++ b/docs/packages/python/index.md
@@ -1,7 +1,7 @@
MXNet Python Package
====================
This page contains links to all the python related documents on python package.
-To install the package package, checkout [Build and Installation Instruction](../../how_to/build.md).
+To install the python package, checkout [Build and Installation Instruction](../../how_to/build.md).
There are three types of documents you can find about mxnet.
* [Tutorials](#tutorials) are self contained materials that introduces a certain use-cases of mxnet.
diff --git a/docs/packages/r/CallbackFunctionTutorial.md b/docs/packages/r/CallbackFunctionTutorial.md
index c27e009fea7c..c60833a75bcd 100644
--- a/docs/packages/r/CallbackFunctionTutorial.md
+++ b/docs/packages/r/CallbackFunctionTutorial.md
@@ -6,7 +6,7 @@ which can very useful in model training.
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/CallbackFunctionTutorial.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/CallbackFunctionTutorial.html)
- You can find the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/CallbackFunctionTutorial.Rmd)
diff --git a/docs/packages/r/CharRnnModel.Rmd b/docs/packages/r/CharRnnModel.md
similarity index 65%
rename from docs/packages/r/CharRnnModel.Rmd
rename to docs/packages/r/CharRnnModel.md
index 9066d60f7513..201301c7981b 100644
--- a/docs/packages/r/CharRnnModel.Rmd
+++ b/docs/packages/r/CharRnnModel.md
@@ -3,35 +3,46 @@ Char RNN Example
This example aims to show how to use lstm model to build a char level language model, and generate text from it. We use a tiny shakespeare text for demo purpose.
-Data can be found at https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare.
+Data can be found at [here](https://github.com/dmlc/web-data/tree/master/mxnet/tinyshakespeare)
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/package/r/CharRnnModel.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/packages/r/CharRnnModel.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/CharRnnModel.Rmd)
Load Data
---------
First of all, load in the data and preprocess it.
-```{r}
+
+```r
require(mxnet)
```
+
+```
+## Loading required package: mxnet
+```
+
+```
+## Loading required package: methods
+```
Set basic network parameters.
-```{r}
+
+```r
batch.size = 32
seq.len = 32
-num.hidden = 256
-num.embed = 256
-num.lstm.layer = 2
-num.round = 3
+num.hidden = 16
+num.embed = 16
+num.lstm.layer = 1
+num.round = 1
learning.rate= 0.1
wd=0.00001
clip_gradient=1
update.period = 1
```
download the data.
-```{r}
+
+```r
download.data <- function(data_dir) {
dir.create(data_dir, showWarnings = FALSE)
if (!file.exists(paste0(data_dir,'input.txt'))) {
@@ -41,7 +52,8 @@ download.data <- function(data_dir) {
}
```
Make dictionary from text.
-```{r}
+
+```r
make.dict <- function(text, max.vocab=10000) {
text <- strsplit(text, '')
dic <- list()
@@ -59,7 +71,8 @@ make.dict <- function(text, max.vocab=10000) {
}
```
Transfer text into data feature.
-```{r}
+
+```r
make.data <- function(file.path, seq.len=32, max.vocab=10000, dic=NULL) {
fi <- file(file.path, "r")
text <- paste(readLines(fi), collapse="\n")
@@ -92,7 +105,8 @@ make.data <- function(file.path, seq.len=32, max.vocab=10000, dic=NULL) {
}
```
Move tail text.
-```{r}
+
+```r
drop.tail <- function(X, batch.size) {
shape <- dim(X)
nstep <- as.integer(shape[2] / batch.size)
@@ -100,7 +114,8 @@ drop.tail <- function(X, batch.size) {
}
```
get the label of X
-```{r}
+
+```r
get.label <- function(X) {
label <- array(0, dim=dim(X))
d <- dim(X)[1]
@@ -114,9 +129,17 @@ get.label <- function(X) {
}
```
get training data and eval data
-```{r}
+
+```r
download.data("./data/")
ret <- make.data("./data/input.txt", seq.len=seq.len)
+```
+
+```
+## Total unique char: 65
+```
+
+```r
X <- ret$data
dic <- ret$dic
lookup.table <- ret$lookup.table
@@ -143,7 +166,8 @@ Training Model
--------------
In `mxnet`, we have a function called `mx.lstm` so that users can build a general lstm model.
-```{r}
+
+```r
model <- mx.lstm(X.train, X.val,
ctx=mx.cpu(),
num.round=num.round,
@@ -159,39 +183,49 @@ model <- mx.lstm(X.train, X.val,
learning.rate=learning.rate,
wd=wd,
clip_gradient=clip_gradient)
+```
```
-Setting the parameters ctx=mx.gpu(0) and num.round=5 can get the following result.
+## Epoch [31] Train: NLL=3.53787130224343, Perp=34.3936275728271
+## Epoch [62] Train: NLL=3.43087958036949, Perp=30.903813186055
+## Epoch [93] Train: NLL=3.39771238228587, Perp=29.8956319855751
+## Epoch [124] Train: NLL=3.37581711716687, Perp=29.2481732041015
+## Epoch [155] Train: NLL=3.34523331338447, Perp=28.3671933405139
+## Epoch [186] Train: NLL=3.30756356274787, Perp=27.31848454823
+## Epoch [217] Train: NLL=3.25642968403829, Perp=25.9566978956055
+## Epoch [248] Train: NLL=3.19825967486207, Perp=24.4898727477925
+## Epoch [279] Train: NLL=3.14013971549828, Perp=23.1070950525017
+## Epoch [310] Train: NLL=3.08747601837462, Perp=21.9216781782189
+## Epoch [341] Train: NLL=3.04015595674863, Perp=20.9085038031042
+## Epoch [372] Train: NLL=2.99839339255659, Perp=20.0532932584534
+## Epoch [403] Train: NLL=2.95940091012609, Perp=19.2864139984503
+## Epoch [434] Train: NLL=2.92603311380224, Perp=18.6534872738302
+## Epoch [465] Train: NLL=2.89482756896395, Perp=18.0803835531869
+## Epoch [496] Train: NLL=2.86668230478397, Perp=17.5786009078994
+## Epoch [527] Train: NLL=2.84089368534943, Perp=17.1310684830416
+## Epoch [558] Train: NLL=2.81725862932279, Perp=16.7309220880514
+## Epoch [589] Train: NLL=2.79518870141492, Perp=16.3657166956952
+## Epoch [620] Train: NLL=2.77445683225304, Perp=16.0299176962855
+## Epoch [651] Train: NLL=2.75490970113174, Perp=15.719621374694
+## Epoch [682] Train: NLL=2.73697900634351, Perp=15.4402696117257
+## Epoch [713] Train: NLL=2.72059739336781, Perp=15.1893935780915
+## Epoch [744] Train: NLL=2.70462837571585, Perp=14.948760335793
+## Epoch [775] Train: NLL=2.68909904683828, Perp=14.7184093476224
+## Epoch [806] Train: NLL=2.67460054451836, Perp=14.5065539595711
+## Epoch [837] Train: NLL=2.66078997776751, Perp=14.3075873113043
+## Epoch [868] Train: NLL=2.6476781639279, Perp=14.1212134100373
+## Epoch [899] Train: NLL=2.63529039846876, Perp=13.9473621677371
+## Epoch [930] Train: NLL=2.62367693518974, Perp=13.7863219168709
+## Epoch [961] Train: NLL=2.61238282674384, Perp=13.6314936713501
+## Iter [1] Train: Time: 10301.6818172932 sec, NLL=2.60536539345356, Perp=13.5361704272949
+## Iter [1] Val: NLL=2.26093848746227, Perp=9.59208699731232
```
-Epoch [31] Train: NLL=3.47213018872144, Perp=32.2052727363657
-...
-Epoch [961] Train: NLL=2.32060007657895, Perp=10.181782322355
-Iter [1] Train: Time: 186.397065639496 sec, NLL=2.31135356537961, Perp=10.0880702804858
-Iter [1] Val: NLL=1.94184484060012, Perp=6.97160060607419
-Epoch [992] Train: NLL=1.84784553299322, Perp=6.34613225095329
-...
-Epoch [1953] Train: NLL=1.70175791172558, Perp=5.48357857093351
-Iter [2] Train: Time: 188.929051160812 sec, NLL=1.70103940328978, Perp=5.47963998859367
-Iter [2] Val: NLL=1.74979316010449, Perp=5.75341251767988
-...
-Epoch [2914] Train: NLL=1.54738185300295, Perp=4.69915099483974
-Iter [3] Train: Time: 185.425321578979 sec, NLL=1.54604189517013, Perp=4.69285854740519
-Iter [3] Val: NLL=1.67780240235925, Perp=5.35377758479576
-Epoch [2945] Train: NLL=1.48868466087876, Perp=4.43126307034767
-...
-Iter [4] Train: Time: 185.487086296082 sec, NLL=1.4744973925858, Perp=4.36883940994296
-Iter [4] Val: NLL=1.64488167325603, Perp=5.18039689118454
-Epoch [3937] Train: NLL=1.46355541021581, Perp=4.32129622881604
-...
-Epoch [4898] Train: NLL=1.42900458455642, Perp=4.17454171976281
-Iter [5] Train: Time: 185.070136785507 sec, NLL=1.42909226256273, Perp=4.17490775130428
-Iter [5] Val: NLL=1.62716655804022, Perp=5.08943365437187
-```
Inference from model
--------------------
helper function for random sample.
-```{r}
+
+```r
cdf <- function(weights) {
total <- sum(weights)
result <- c()
@@ -224,16 +258,14 @@ choice <- function(weights) {
}
```
we can use random output or fixed output by choosing largest probability.
-```{r}
-make.output <- function(prob, sample=FALSE, temperature=1.) {
+
+```r
+make.output <- function(prob, sample=FALSE) {
if (!sample) {
idx <- which.max(as.array(prob))
}
else {
- scale_prob <- mx.nd.clip(prob, 1e-6, 1 - 1e-6)
- rescale <- mx.nd.exp(mx.nd.log(scale_prob) / temperature)
- rescale <- rescale / (as.array(mx.nd.sum(rescale))[1])
- idx <- choice(rescale)
+ idx <- choice(prob)
}
return (idx)
@@ -242,7 +274,8 @@ make.output <- function(prob, sample=FALSE, temperature=1.) {
In `mxnet`, we have a function called `mx.lstm.inference` so that users can build a inference from lstm model and then use function `mx.lstm.forward` to get forward output from the inference.
Build inference from model.
-```{r}
+
+```r
infer.model <- mx.lstm.inference(num.lstm.layer=num.lstm.layer,
input.size=vocab,
num.hidden=num.hidden,
@@ -252,7 +285,7 @@ infer.model <- mx.lstm.inference(num.lstm.layer=num.lstm.layer,
ctx=mx.cpu())
```
generate a sequence of 75 chars using function `mx.lstm.forward`.
-```
+```r
start <- 'a'
seq.len <- 75
random.sample <- TRUE
@@ -273,4 +306,10 @@ The result:
```
ah not a drobl greens
Settled asing lately sistering sounted to their hight
-```
\ No newline at end of file
+```
+
+Other RNN models
+----------------
+In `mxnet`, other RNN models like custom RNN and gru is also provided.
+- For **custom RNN model**, you can replace `mx.lstm` with `mx.rnn` to train rnn model. Also, you can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.rnn.inference` and `mx.rnn.forward` to inference from rnn model and get forward result from the inference model.
+- For **GRU model**, you can replace `mx.lstm` with `mx.gru` to train gru model. Also, you can replace `mx.lstm.inference` and `mx.lstm.forward` with `mx.gru.inference` and `mx.gru.forward` to inference from gru model and get forward result from the inference model.
diff --git a/docs/packages/r/classifyRealImageWithPretrainedModel.md b/docs/packages/r/classifyRealImageWithPretrainedModel.md
index 6d73bb409bce..7bc5fec1a08f 100644
--- a/docs/packages/r/classifyRealImageWithPretrainedModel.md
+++ b/docs/packages/r/classifyRealImageWithPretrainedModel.md
@@ -6,13 +6,13 @@ algorithm can do is to classify real world images.
In this example we will show how to use a pretrained Inception-BatchNorm Network to predict the class of
real world image. The network architecture is decribed in [1].
-The pre-trained Inception-BatchNorm network is able to be downloaded from [this link](http://webdocs.cs.ualberta.ca/~bx3/data/Inception.zip)
+The pre-trained Inception-BatchNorm network is able to be downloaded from [this link](http://data.dmlc.ml/mxnet/data/Inception.zip)
This model gives the recent state-of-art prediction accuracy on image net dataset.
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/classifyRealImageWithPretrainedModel.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/classifyRealImageWithPretrainedModel.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/classifyRealImageWithPretrainedModel.Rmd)
Pacakge Loading
@@ -112,7 +112,7 @@ preproc.image <- function(im, mean.image) {
shape <- dim(im)
short.edge <- min(shape[1:2])
xx <- floor((shape[1] - short.edge) / 2)
- yy <- floor((shape[2] - short.edge) / 2)
+ yy <- floor((shape[2] - short.edge) / 2)
croped <- crop.borders(im, xx, yy)
# resize to 224 x 224, needed by input of the model.
resized <- resize(croped, 224, 224)
diff --git a/docs/packages/r/fiveMinutesNeuralNetwork.md b/docs/packages/r/fiveMinutesNeuralNetwork.md
index 1d56c7984d7d..6102eaee7569 100644
--- a/docs/packages/r/fiveMinutesNeuralNetwork.md
+++ b/docs/packages/r/fiveMinutesNeuralNetwork.md
@@ -8,7 +8,7 @@ We will show you how to do classification and regression tasks respectively. The
Preface
-------
This tutorial is written in Rmarkdown.
-- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.org/en/latest/R-package/fiveMinutesNeuralNetwork.html)
+- You can directly view the hosted version of the tutorial from [MXNet R Document](http://mxnet.readthedocs.io/en/latest/packages/r/fiveMinutesNeuralNetwork.html)
- You can find the download the Rmarkdown source from [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd)
## Classification
diff --git a/docs/packages/r/index.md b/docs/packages/r/index.md
index ef427abc4899..829ca1d995e7 100644
--- a/docs/packages/r/index.md
+++ b/docs/packages/r/index.md
@@ -20,7 +20,7 @@ Tutorials
* [Handwritten Digits Classification Competition](mnistCompetition.md)
* [Tutorial on NDArray and Symbol](ndarrayAndSymbolTutorial.md)
* [Tutorial on Callback Functions](CallbackFunctionTutorial.md)
-* [Character Language Model using RNN Model](CharRnnModel.Rmd)
+* [Character Language Model using RNN Model](CharRnnModel.md)
Resources
---------
diff --git a/docs/packages/r/mnistCompetition.md b/docs/packages/r/mnistCompetition.md
index 4a0a0d71f854..a84ecb5ec326 100644
--- a/docs/packages/r/mnistCompetition.md
+++ b/docs/packages/r/mnistCompetition.md
@@ -5,7 +5,7 @@ Handwritten Digits Classification Competition
We will present the basic usage of [mxnet](https://github.com/dmlc/mxnet/tree/master/R-package) to compete in this challenge.
This tutorial is written in Rmarkdown. You can download the source [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/mnistCompetition.Rmd) and view a
-hosted version of tutorial [here](http://mxnet.readthedocs.org/en/latest/R-package/mnistCompetition.html).
+hosted version of tutorial [here](http://mxnet.readthedocs.io/en/latest/packages/r/mnistCompetition.html).
## Data Loading
diff --git a/docs/tutorials/imagenet_full.md b/docs/tutorials/imagenet_full.md
index ba6b26648cdf..f0e722ed975d 100644
--- a/docs/tutorials/imagenet_full.md
+++ b/docs/tutorials/imagenet_full.md
@@ -68,7 +68,7 @@ We should note that this result is by no means optimal, as we did not carefully
## The Code and Model
The code and step guide is publically available at [https://github.com/dmlc/mxnet/tree/master/example/image-classification](https://github.com/dmlc/mxnet/tree/master/example/image-classification)
-We also release a pretrained model under [https://github.com/dmlc/mxnet-model-gallery/tree/master/imagenet-21k-inception](https://github.com/dmlc/mxnet-model-gallery/tree/master/imagenet-21k-inception)
+We also release a pretrained model under [https://github.com/dmlc/mxnet-model-gallery/tree/master/imagenet-21k-inception](https://github.com/dmlc/mxnet-model-gallery/tree/master/imagenet-21k-inception.md)
## How to Use The Model
We should point out it 21k classes is much more challenging than 1k. Directly use the raw prediction is not a reasonable way.
diff --git a/docs/zh/packages/python/index.md b/docs/zh/packages/python/index.md
new file mode 100644
index 000000000000..0a539eb6bb36
--- /dev/null
+++ b/docs/zh/packages/python/index.md
@@ -0,0 +1,26 @@
+MXNet Python Package
+====================
+
+这个页面包含 python 程序包中所有相关的文档.
+为了安装 python 程序包, 请 checkout [Build and Installation Instruction](../../how_to/build.md).
+
+这里有关于 mxnet 的三种文档.
+
+* [Tutorials](#tutorials) 介绍一个特定的关于 mxnet 的用例.
+* [Code Examples](../../../example) 示例代码.
+* [Python API Documents](#python-api-documents) 关于指定模块的文档, 同时也包含所有 API 的参考文档.
+
+Tutorials
+---------
+* [Python Overview Tutorial](tutorial.md)
+* [Symbolic Configuration and Execution in Pictures](symbol_in_pictures.md)
+* [How to Create New Operations (Layers)](../../how_to/new_op.md)
+
+Python API Documents
+--------------------
+* [High Level Model Training Related API](model.md)
+* [The Module API](module.md)
+* [NDArray API](ndarray.md)
+* [Symbolic API](symbol.md)
+* [KVStore API](kvstore.md)
+* [Data Loading API](io.md)
diff --git a/docs/zh/packages/python/io.md b/docs/zh/packages/python/io.md
new file mode 100644
index 000000000000..08165d84ed0d
--- /dev/null
+++ b/docs/zh/packages/python/io.md
@@ -0,0 +1,185 @@
+MXNet Python Data Loading API
+=============================
+* [Introduction](#introduction) 介绍 MXNet 数据加载模块的主要特性.
+* [Parameters For Data Iterator](#parameters-for-data-iterator) 阐述清楚 dataIter 的参数的不同用法.
+* [Create A Data Iterator](#create-a-data-iterator) 介绍如何在创建一个 python 版本的 MXNet 的 Data Iterator.
+* [How To Get Data](#how-to-get-data) 介绍数据源以及数据预处理工具.
+* [IO API Reference](#io-api-reference) IO API 参考文档以及它们的解释.
+
+Introduction
+------------
+这页面介绍 MXNet 的数据输入方式. MXNet 使用迭代器 (iterator)的方式向神经网络输入数据. 迭代器做了一些数据预处理, 同时以 batch 的形式向神经网络提供数据.
+
+
+* 我们为 MNIST 图像和 RecordIO 图像提供了基本的迭代器.
+* 为了掩盖 IO 开销, 我们提供了预处理策略, 它可以让机器学习的过程和取数据的过程并行来做. 我们使用一个单独的线程来做取数据的工作.
+
+Parameters For Data Iterator
+----------------------------
+
+一般地讲, 如果你要创建一个数据迭代器, 你需要实现下面讲到的五种参数:
+
+* **Dataset Param** 提供数据集的基本信息, 比如说, 文件路径, 输入的数据的 shape.
+* **Batch Param** 提供构建一个 batch 的信息, 比如说 batch size.
+* **Augmentation Param** 指定输入数据的扩充方式 (e.g. crop, mirror).
+* **Backend Param** 控制后端线程掩盖数据加载开销的行为.
+* **Auxiliary Param** 提供的可选项, 用来帮助检查和 debug..
+
+通常地讲, **Dataset Param** 和 **Batch Param** *必须* 提 供, 否则 data batch 无法创建. 其他的参数根据算法和性能的需要来设置. 文档的后半部分会提供解释详尽的例子.
+
+Create A Data Iterator
+----------------------
+这个 IO API 提供在 python 中创建数据迭代器的简单方式. 下面的代码是如何创建一个 Cifar 的数据迭代器的代码.
+
+
+```python
+>>>dataiter = mx.io.ImageRecordIter(
+>>> # Utility Parameter
+>>> # 可选
+>>> # Name of the data, should match the name of the data input of the network
+>>> # data_name='data',
+>>> # Utility Parameter
+>>> # 可选
+>>> # Name of the label, should match the name of the label parameter of the network.
+>>> # Usually, if the loss layer is named 'foo', then the label input has the name
+>>> # 'foo_label', unless overwritten
+>>> # label_name='softmax_label',
+>>> # Dataset Parameter
+>>> # Impulsary
+>>> # indicating the data file, please check the data is already there
+>>> path_imgrec="data/cifar/train.rec",
+>>> # Dataset Parameter
+>>> # Impulsary
+>>> # indicating the image size after preprocessing
+>>> data_shape=(3,28,28),
+>>> # Batch Parameter
+>>> # Impulsary
+>>> # tells how many images in a batch
+>>> batch_size=100,
+>>> # Augmentation Parameter
+>>> # 可选
+>>> # when offers mean_img, each image will substract the mean value at each pixel
+>>> mean_img="data/cifar/cifar10_mean.bin",
+>>> # Augmentation Parameter
+>>> # 可选
+>>> # randomly crop a patch of the data_shape from the original image
+>>> rand_crop=True,
+>>> # Augmentation Parameter
+>>> # Optional
+>>> # randomly mirror the image horizontally
+>>> rand_mirror=True,
+>>> # Augmentation Parameter
+>>> # Optional
+>>> # randomly shuffle the data
+>>> shuffle=False,
+>>> # Backend Parameter
+>>> # Optional
+>>> # Preprocessing thread number
+>>> preprocess_threads=4,
+>>> # Backend Parameter
+>>> # Optional
+>>> # Prefetch buffer size
+>>> prefetch_buffer=1)
+```
+
+从上面的代码中, 我们可以学到如何创建一个数据迭代器. 首先, 你需要明确的指出需要取哪种类型的数据(MNIST, ImageRecord 等等). 然后, 提供描述数据的可选参数, 比如 batching, 数据扩充方式, 多线程处理, 预取数据. MNNet 框架会检查参数的有效性, 如果一个必须的参数没有提供, 框架会报错.
+
+
+How To Get Data
+---------------
+
+
+我们提供了 [脚本](../../tests/python/common/get_data.py) 来下载MNIST数据 和Cifar10 ImageRecord 数据. 如果你要创建你自己的数据集, 我们建议您用RecordIO 作为数据格式.
+
+## Create Dataset Using RecordIO
+
+RecordIO 实现了顺序存储 record 的数据格式. 我们建议图像数据按照 record 的格式来存储和打包到一起. 这样做的有以下几点:
+
+
+* 将图像储存为压缩过的格式, 比如 JPEG, 因为 record 可以大小不同. 压缩过的格式可以极大的减小储存在硬盘上的数据集大小.
+* 将若干 record 打包存储, 可以实现硬盘的连续读取, 避免随机读取硬盘.
+* RecordIO 容易分块, 这样分布式处理的设置会更加简单. 后面会有例子具体来说明.
+
+我们提供了 [im2rec tool](../../tools/im2rec.cc) 来让用户自己来生成 RecordIO 格式的数据集. 下面是具体流程:
+
+### 0.Before you start
+确定你已经下载了需要的数据集. 你不需要自己来做图像的 resize 操作, 现在 `im2rec` 这个工具可以自动来做这种操作. 你可以查看 `im2rec` 提供的的信息来获取更多的内容.
+
+### 1.Make the image list
+当你得到了信息之后, 你首先需要生成一个 image list 的文件. 格式如下
+```
+integer_image_index \t label_index \t path_to_image
+```
+通常, 这个程序会读取一个包含所有图像文件名的列表文件, shuffe 这些文件, 然后将 shuffe 后的图像文件名列表分为训练列表文件和测试列表文件. 按照下面给出的例子的格式存储.
+
+简单的例子文件
+
+```bash
+895099 464 n04467665_17283.JPEG
+10025081 412 ILSVRC2010_val_00025082.JPEG
+74181 789 n01915811_2739.JPEG
+10035553 859 ILSVRC2010_val_00035554.JPEG
+10048727 929 ILSVRC2010_val_00048728.JPEG
+94028 924 n01980166_4956.JPEG
+1080682 650 n11807979_571.JPEG
+972457 633 n07723039_1627.JPEG
+7534 11 n01630670_4486.JPEG
+1191261 249 n12407079_5106.JPEG
+```
+
+### 2.Make the binary file
+
+需要用 *im2rec* 这个程序来生成二进制文件. im2rec 需要你刚刚生成的 _ image list file _ 的路径, 图像的 _root_ 路径 和 _output file_ 路径作为参数. 这个过程需要花费几个小时, 所以需要耐心. :)
+
+
+简单的例子:
+```bash
+./bin/im2rec image.lst image_root_dir output.bin resize=256
+```
+要想获得更多的用法, 直接运行 ```./bin/im2rec```命令, 会在终端打印出详细的用法.
+
+### Extension: Mutliple Labels for a Single Image
+
+`im2rec` 工具以及 `mx.io.ImageRecordIter` 支持对单个图像打多个标签. 假设你需要为单个图像打四个标签, 你可以按照下面的步骤来使用 RecordIO 相关的工具.
+
+1. 按照下面的格式生成 image list 文件:
+```
+integer_image_index \t label_1 \t label_2 \t label_3 \t label_4 \t path_to_image
+```
+
+2. 使用 `im2rec` 时, 需要增加一个 'label_width=4' 作为命令行参数, 比如.
+```bash
+./bin/im2rec image.lst image_root_dir output.bin resize=256 label_width=4
+```
+
+3. 在你的迭代器初始化的时候, 设置 `label_width=4` 和 `path_imglist=<>` 作为参数.
+
+```python
+dataiter = mx.io.ImageRecordIter(
+ path_imgrec="data/cifar/train.rec",
+ data_shape=(3,28,28),
+ path_imglist="data/cifar/image.lst",
+ label_width=4
+)
+```
+
+这样你就完成了一个多标签的数据迭代器.
+
+```eval_rst
+.. raw:: html
+
+
+```
+
+
+IO API Reference
+----------------
+
+```eval_rst
+.. automodule:: mxnet.io
+ :members:
+
+.. raw:: html
+
+
+```
diff --git a/docs/zh/packages/python/kvstore.md b/docs/zh/packages/python/kvstore.md
new file mode 100644
index 000000000000..5d694036d548
--- /dev/null
+++ b/docs/zh/packages/python/kvstore.md
@@ -0,0 +1,133 @@
+KVStore API
+===========
+
+* [基本的 Push 和 Pull 操作](#basic-push-and-pull)
+* [key-value pairs 列表的接口](#interface-for-list-key-value-pairs)
+* [多机]() TODO
+
+## Basic Push and Pull
+
+单机多卡的基本操作.
+
+### Initialization
+
+首先让我们来考虑一个简单的例子. 首先初始化一个 (`int`, `NDAarray`) push 到 KVstore 里, 然后再将数据 pull 下来.
+
+```python
+>>> kv = mx.kv.create('local') # create a local kv store.
+>>> shape = (2,3)
+>>> kv.init(3, mx.nd.ones(shape)*2)
+>>> a = mx.nd.zeros(shape)
+>>> kv.pull(3, out = a)
+>>> print a.asnumpy()
+[[ 2. 2. 2.]
+ [ 2. 2. 2.]]
+```
+
+### Push, Aggregation, and Updater
+
+对于任意一个被初始化的 key-value 数据, 我们可以向这个 `key` push 一个相同 shape 的数据覆盖掉原来的 value.
+
+
+```python
+>>> kv.push(3, mx.nd.ones(shape)*8)
+>>> kv.pull(3, out = a) # pull out the value
+>>> print a.asnumpy()
+[[ 8. 8. 8.]
+ [ 8. 8. 8.]]
+```
+
+需要做 push 操作的数据可以存储在任意的设备上. 而且, 我们可以向同一个 key 推送多份数据, KVStore 客户端会首先将这些数据做 sum 操作, 然后将聚合后的结果 push 到服务器端, 减少了数据通信.
+
+```python
+>>> gpus = [mx.gpu(i) for i in range(4)]
+>>> b = [mx.nd.ones(shape, gpu) for gpu in gpus]
+>>> kv.push(3, b)
+>>> kv.pull(3, out = a)
+>>> print a.asnumpy()
+[[ 4. 4. 4.]
+ [ 4. 4. 4.]]
+```
+
+对于每一个 push 操作, KVStore 将推送上来的数据通过 `updater` 定义的方式来进行更新操作. 默认的 `updater` 是 `ASSGIN`, 我们可以根据需要来替换掉这个默认的 `update`.
+
+```python
+>>> def update(key, input, stored):
+>>> print "update on key: %d" % key
+>>> stored += input * 2
+>>> kv._set_updater(update)
+>>> kv.pull(3, out=a)
+>>> print a.asnumpy()
+[[ 4. 4. 4.]
+ [ 4. 4. 4.]]
+>>> kv.push(3, mx.nd.ones(shape))
+update on key: 3
+>>> kv.pull(3, out=a)
+>>> print a.asnumpy()
+[[ 6. 6. 6.]
+ [ 6. 6. 6.]]
+```
+
+### Pull
+
+我们已经看到如何 pull 单个的 key-value 对. 类似于 push, 我们也能只用一个调用来将数据 pull 到多个设备中.
+
+```python
+>>> b = [mx.nd.ones(shape, gpu) for gpu in gpus]
+>>> kv.pull(3, out = b)
+>>> print b[1].asnumpy()
+[[ 6. 6. 6.]
+ [ 6. 6. 6.]]
+```
+
+## Interface for list key-value pairs
+
+我们到现在为止所介绍的所有操作都是关于一个 key. KVStore 也提供了对 key-value pair 列表的接口.
+
+针对单个的设备:
+
+```python
+>>> keys = [5, 7, 9]
+>>> kv.init(keys, [mx.nd.ones(shape)]*len(keys))
+>>> kv.push(keys, [mx.nd.ones(shape)]*len(keys))
+update on key: 5
+update on key: 7
+update on key: 9
+>>> b = [mx.nd.zeros(shape)]*len(keys)
+>>> kv.pull(keys, out = b)
+>>> print b[1].asnumpy()
+[[ 3. 3. 3.]
+ [ 3. 3. 3.]]
+```
+
+针对多个设备:
+
+```python
+>>> b = [[mx.nd.ones(shape, gpu) for gpu in gpus]] * len(keys)
+>>> kv.push(keys, b)
+update on key: 5
+update on key: 7
+update on key: 9
+>>> kv.pull(keys, out = b)
+>>> print b[1][1].asnumpy()
+[[ 11. 11. 11.]
+ [ 11. 11. 11.]]
+```
+
+```eval_rst
+.. raw:: html
+
+
+```
+
+
+## API Reference
+
+```eval_rst
+.. automodule:: mxnet.kvstore
+ :members:
+
+.. raw:: html
+
+
+```
diff --git a/docs/zh/packages/python/ndarray.md b/docs/zh/packages/python/ndarray.md
new file mode 100644
index 000000000000..3b2fad0a199c
--- /dev/null
+++ b/docs/zh/packages/python/ndarray.md
@@ -0,0 +1,161 @@
+NDArray API
+===========
+
+NDArray 程序包 (`mxnet.ndarray`) 包含类似于 `numpy.ndarray` 的 张量计算包. 它的语法很相近, 除了增加了一些处理 I/O 和多设备的调用.
+
+Create NDArray
+--------------
+
+类似 `numpy`, 你可以按照下面的方式来创建 `mxnet.ndarray` :
+```python
+>>> import mxnet as mx
+>>> # all-zero array of dimension 100x50
+>>> a = mx.nd.zeros((100, 50))
+>>> # all-one array of dimension 256x32x128x1
+>>> b = mx.nd.ones((256, 32, 128, 1))
+>>> # initialize array with contents
+>>> c = mx.nd.array([[1, 2, 3], [4, 5, 6]])
+```
+
+NDArray operations
+-------------------
+
+我们提供了几个基本的 ndarray 操作, 比如说算术和切片. 更多的操作正在开发中!
+
+### 算术操作
+```python
+>>> import mxnet as mx
+>>> a = mx.nd.zeros((100, 50))
+>>> a.shape
+(100L, 50L)
+>>> b = mx.nd.ones((100, 50))
+>>> # c and d will be calculated in parallel here!
+>>> c = a + b
+>>> d = a - b
+>>> # inplace operation, b's contents will be modified, but c and d won't be affected.
+>>> b += d
+```
+
+### 切片操作
+```python
+>>> import mxnet as mx
+>>> a = mx.nd.zeros((100, 50))
+>>> a[0:10] = 1 # first 10 rows will become 1
+```
+
+Conversion from/to `numpy.ndarray`
+----------------------------------
+
+MXNet NDArray 提供了很自然的方式来支持`mxnet.ndarray` 和 `numpy.ndarray` 之间的互相转换:
+
+```python
+>>> import mxnet as mx
+>>> import numpy as np
+>>> a = np.array([1,2,3])
+>>> b = mx.nd.array(a) # convert from numpy array
+>>> b
+
+>>> b.asnumpy() # convert to numpy array
+array([ 1., 2., 3.], dtype=float32)
+```
+
+Save Load NDArray
+-----------------
+
+你可以一种使用 pickle 来保存和加载 NDArray.
+我们也提供了一些函数来简化 NDArray 的列表或者字典的加载与保存操作.
+
+```python
+>>> import mxnet as mx
+>>> a = mx.nd.zeros((100, 200))
+>>> b = mx.nd.zeros((100, 200))
+>>> # save list of NDArrays
+>>> mx.nd.save("/path/to/array/file", [a, b])
+>>> # save dictionary of NDArrays to AWS S3
+>>> mx.nd.save("s3://path/to/s3/array", {'A' : a, 'B' : b})
+>>> # save list of NDArrays to hdfs.
+>>> mx.nd.save("hdfs://path/to/hdfs/array", [a, b])
+>>> from_file = mx.nd.load("/path/to/array/file")
+>>> from_s3 = mx.nd.load("s3://path/to/s3/array")
+>>> from_hdfs = mx.nd.load("hdfs://path/to/hdfs/array")
+```
+
+使用 `save` 和 `load` 的好的一方面是:
+- 你可以在所有的 `mxnet` 的其他编程语言的绑定中相同的接口.
+- 已经支持 S3 和 HDFS
+
+Multi-device Support
+--------------------
+设备信息是存储在 `mxnet.Context` 数据结构中. 当我们在 mxnet 中创建 ndarray 的时候, 我们要么使用上下文参数(默认是 CPU 上下文) 在指定的设备上创建, 或者按照下面的例子中的方式使用 `with` 表达式:
+
+```python
+>>> import mxnet as mx
+>>> cpu_a = mx.nd.zeros((100, 200))
+>>> cpu_a.context
+cpu(0)
+>>> with mx.Context(mx.gpu(0)):
+>>> gpu_a = mx.nd.ones((100, 200))
+>>> gpu_a.context
+gpu(0)
+>>> ctx = mx.Context(mx.gpu(0))
+>>> gpu_b = mx.nd.zeros((100, 200), ctx)
+>>> gpu_b.context
+gpu(0)
+```
+
+现在我们还 *不支持* 涉及不同上下文环境中的多个 ndarray 的操作. 为了支持这种情况下的操作, 我们首先使用 `copyto` 方法将不同的上下文环境中的 ndarray 拷贝到同一个上下文环境中, 然后执行相应的操作:
+
+```python
+>>> import mxnet as mx
+>>> x = mx.nd.zeros((100, 200))
+>>> with mx.Context(mx.gpu(0)):
+>>> y = mx.nd.zeros((100, 200))
+>>> z = x + y
+mxnet.base.MXNetError: [13:29:12] src/ndarray/ndarray.cc:33: Check failed: lhs.ctx() == rhs.ctx() operands context mismatch
+>>> cpu_y = mx.nd.zeros((100, 200))
+>>> y.copyto(cpu_y)
+>>> z = x + cpu_y
+```
+
+```eval_rst
+.. raw:: html
+
+
+```
+
+NDArray API Reference
+---------------------
+
+```eval_rst
+.. automodule:: mxnet.ndarray
+ :members:
+
+.. raw:: html
+
+
+```
+
+NDArray Random API Reference
+----------------------------
+
+```eval_rst
+.. automodule:: mxnet.random
+ :members:
+
+.. raw:: html
+
+
+```
+
+
+Context API Reference
+---------------------
+
+```eval_rst
+.. automodule:: mxnet.context
+ :members:
+
+.. raw:: html
+
+
+```
diff --git a/docs/zh/system/engine.md b/docs/zh/system/dep_engine.md
similarity index 100%
rename from docs/zh/system/engine.md
rename to docs/zh/system/dep_engine.md
diff --git a/docs/zh/system/index.md b/docs/zh/system/index.md
index 2798b9531daa..31811ca34547 100644
--- a/docs/zh/system/index.md
+++ b/docs/zh/system/index.md
@@ -21,7 +21,7 @@
上面显示的是 mxnet 的主要的模块以及它们之间如何进行交互. 这些模块是
-- [运行时依赖引擎](engine.md): 根据操作的读写依赖关系来调度和执行这些操作.
+- [运行时依赖引擎](dep_engine.md): 根据操作的读写依赖关系来调度和执行这些操作.
- Storage Allocator: 可以高效的申请内存和重复利用内存, 包括 CPU 的主存和 GPU 的显存.
- Resource Manager: 管理全局资源, 包括 随机数产生器以及临时空间.
- NDArray: 动态的,异步的n维数组, 为MXNet 提供命令式编程模型.
diff --git a/docs/zh/system/note_data_loading.md b/docs/zh/system/note_data_loading.md
index a48040df98d3..738a6e23eb44 100644
--- a/docs/zh/system/note_data_loading.md
+++ b/docs/zh/system/note_data_loading.md
@@ -113,7 +113,7 @@ InputSplit 需要下面的几个参数:
### Hide IO Cost Using Threadediter
-掩藏 IO 开销的一种方式是主线程在做 feed-forward 和 backward 的时候, 使用一个独立的现成做数据预取操作. 为了支持更加复杂的训练方案, MXNet 提供了基于 dmlc-core 的 threadediter 更加通用的 IO 处理流水线.
+掩藏 IO 开销的一种方式是主线程在做 feed-forward 和 backward 的时候, 使用一个独立的线程做数据预取操作. 为了支持更加复杂的训练方案, MXNet 提供了基于 dmlc-core 的 threadediter 更加通用的 IO 处理流水线.
Threadediter 的重点是使用一个独立的线程作为数据提供者, 主线程作为数据消费者, 图示如下.
diff --git a/example/cnn_text_classification/text_cnn.py b/example/cnn_text_classification/text_cnn.py
index c944ec5c9270..4ce48a94ac77 100644
--- a/example/cnn_text_classification/text_cnn.py
+++ b/example/cnn_text_classification/text_cnn.py
@@ -13,20 +13,28 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__) # get a logger to accuracies are printed
+logs = sys.stderr
+
CNNModel = namedtuple("CNNModel", ['cnn_exec', 'symbol', 'data', 'label', 'param_blocks'])
-def make_text_cnn(sentence_size, num_embed, batch_size, num_label=2, filter_list=[3, 4, 5], num_filter=100, dropout=0.):
+def make_text_cnn(sentence_size, num_embed, batch_size, vocab_size,
+ num_label=2, filter_list=[3, 4, 5], num_filter=100,
+ dropout=0., with_embedding=True):
+
input_x = mx.sym.Variable('data') # placeholder for input
input_y = mx.sym.Variable('softmax_label') # placeholder for output
# embedding layer
- # embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size, output_dim=num_embed, name='vocab_embed')
- # embed_layer = mx.sym.Reshape(data=embed_layer, target_shape=(1, 1, sentence_size, num_embed))
+ if not with_embedding:
+ embed_layer = mx.sym.Embedding(data=input_x, input_dim=vocab_size, output_dim=num_embed, name='vocab_embed')
+ conv_input = mx.sym.Reshape(data=embed_layer, target_shape=(batch_size, 1, sentence_size, num_embed))
+ else:
+ conv_input = input_x
# create convolution + (max) pooling layer for each filter operation
pooled_outputs = []
for i, filter_size in enumerate(filter_list):
- convi = mx.sym.Convolution(data=input_x, kernel=(filter_size, num_embed), num_filter=num_filter)
+ convi = mx.sym.Convolution(data=conv_input, kernel=(filter_size, num_embed), num_filter=num_filter)
relui = mx.sym.Activation(data=convi, act_type='relu')
pooli = mx.sym.Pooling(data=relui, pool_type='max', kernel=(sentence_size - filter_size + 1, 1), stride=(1,1))
pooled_outputs.append(pooli)
@@ -54,12 +62,18 @@ def make_text_cnn(sentence_size, num_embed, batch_size, num_label=2, filter_list
return sm
-def setup_cnn_model(ctx, batch_size, sentence_size, num_embed, dropout=0.5, initializer=mx.initializer.Uniform(0.1)):
- cnn = make_text_cnn(sentence_size, num_embed, batch_size=batch_size, dropout=dropout)
+def setup_cnn_model(ctx, batch_size, sentence_size, num_embed, vocab_size,
+ dropout=0.5, initializer=mx.initializer.Uniform(0.1), with_embedding=True):
+
+ cnn = make_text_cnn(sentence_size, num_embed, batch_size=batch_size,
+ vocab_size=vocab_size, dropout=dropout, with_embedding=with_embedding)
arg_names = cnn.list_arguments()
input_shapes = {}
- input_shapes['data'] = (batch_size, 1, sentence_size, num_embed)
+ if with_embedding:
+ input_shapes['data'] = (batch_size, 1, sentence_size, num_embed)
+ else:
+ input_shapes['data'] = (batch_size, sentence_size)
arg_shape, out_shape, aux_shape = cnn.infer_shape(**input_shapes)
arg_arrays = [mx.nd.zeros(s, ctx) for s in arg_shape]
@@ -88,7 +102,8 @@ def setup_cnn_model(ctx, batch_size, sentence_size, num_embed, dropout=0.5, init
return CNNModel(cnn_exec=cnn_exec, symbol=cnn, data=data, label=label, param_blocks=param_blocks)
-def train_cnn(model, X_train_batch, y_train_batch, X_dev_batch, y_dev_batch, batch_size, optimizer='rmsprop', max_grad_norm=5.0, learning_rate=0.001, epoch=200):
+def train_cnn(model, X_train_batch, y_train_batch, X_dev_batch, y_dev_batch, batch_size,
+ optimizer='rmsprop', max_grad_norm=5.0, learning_rate=0.0005, epoch=200):
m = model
# create optimizer
opt = mx.optimizer.create(optimizer)
@@ -139,13 +154,25 @@ def train_cnn(model, X_train_batch, y_train_batch, X_dev_batch, y_dev_batch, bat
# decay learning rate
if iteration % 50 == 0 and iteration > 0:
opt.lr *= 0.5
- print >> sys.stderr, 'reset learning rate to %g' % opt.lr
+ print >> logs, 'reset learning rate to %g' % opt.lr
# end of training loop
toc = time.time()
- print >> sys.stderr, 'Iter [%d] Train: Time: %.3f, Training Accuracy: %.3f' % (iteration, toc - tic, num_correct * 100 / float(num_total))
+ train_time = toc - tic
+ train_acc = num_correct * 100 / float(num_total)
+
+ # saving checkpoint
+ if (iteration + 1) % 10 == 0:
+ prefix = 'cnn'
+ m.symbol.save('checkpoint/%s-symbol.json' % prefix)
+ save_dict = {('arg:%s' % k) :v for k, v in m.cnn_exec.arg_dict.items()}
+ save_dict.update({('aux:%s' % k) : v for k, v in m.cnn_exec.aux_dict.items()})
+ param_name = 'checkpoint/%s-%04d.params' % (prefix, iteration)
+ mx.nd.save(param_name, save_dict)
+ print >> logs, 'Saved checkpoint to %s' % param_name
+
- # eval on dev set
+ # evaluate on dev set
num_correct = 0
num_total = 0
for begin in range(0, X_dev_batch.shape[0], batch_size):
@@ -161,7 +188,9 @@ def train_cnn(model, X_train_batch, y_train_batch, X_dev_batch, y_dev_batch, bat
num_correct += sum(batchY == np.argmax(m.cnn_exec.outputs[0].asnumpy(), axis=1))
num_total += len(batchY)
- print >> sys.stderr, 'Dev Accuracy thus far: %.3f' % ( num_correct * 100 / float(num_total) )
+ dev_acc = num_correct * 100 / float(num_total)
+ print >> logs, 'Iter [%d] Train: Time: %.3fs, Training Accuracy: %.3f \
+ --- Dev Accuracy thus far: %.3f' % (iteration, train_time, train_acc, dev_acc)
def main():
@@ -170,7 +199,6 @@ def main():
word2vec = data_helpers.load_pretrained_word2vec('data/rt.vec')
x, y = data_helpers.load_data_with_word2vec(word2vec)
-
# randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(y)))
@@ -194,9 +222,38 @@ def main():
print 'embedding size', num_embed
batch_size = 50
- cnn_model = setup_cnn_model(mx.gpu(0), batch_size, sentence_size, num_embed, dropout=0.5)
+ cnn_model = setup_cnn_model(mx.gpu(1), batch_size, sentence_size, num_embed, dropout=0.5)
+ train_cnn(cnn_model, x_train, y_train, x_dev, y_dev, batch_size)
+
+def train_without_pretrained_embedding():
+ x, y, vocab, vocab_inv = data_helpers.load_data()
+ vocab_size = len(vocab)
+
+ # randomly shuffle data
+ np.random.seed(10)
+ shuffle_indices = np.random.permutation(np.arange(len(y)))
+ x_shuffled = x[shuffle_indices]
+ y_shuffled = y[shuffle_indices]
+
+ # split train/dev set
+ x_train, x_dev = x_shuffled[:-1000], x_shuffled[-1000:]
+ y_train, y_dev = y_shuffled[:-1000], y_shuffled[-1000:]
+ print 'Train/Dev split: %d/%d' % (len(y_train), len(y_dev))
+ print 'train shape:', x_train.shape
+ print 'dev shape:', x_dev.shape
+ print 'vocab_size', vocab_size
+
+ batch_size = 50
+ num_embed = 300
+ sentence_size = x_train.shape[1]
+
+ print 'batch size', batch_size
+ print 'sentence max words', sentence_size
+ print 'embedding size', num_embed
+
+ cnn_model = setup_cnn_model(mx.gpu(0), batch_size, sentence_size, num_embed, vocab_size, dropout=0.5, with_embedding=False)
train_cnn(cnn_model, x_train, y_train, x_dev, y_dev, batch_size)
if __name__ == '__main__':
- main()
+ train_without_pretrained_embedding()
diff --git a/example/image-classification/symbol_inception-bn-28-small.py b/example/image-classification/symbol_inception-bn-28-small.py
index bc934c377b5a..b5a2afce2c1c 100644
--- a/example/image-classification/symbol_inception-bn-28-small.py
+++ b/example/image-classification/symbol_inception-bn-28-small.py
@@ -17,7 +17,7 @@ def DownsampleFactory(data, ch_3x3, mirror_attr):
# conv 3x3
conv = ConvFactory(data=data, kernel=(3, 3), stride=(2, 2), num_filter=ch_3x3, pad=(1, 1), mirror_attr=mirror_attr)
# pool
- pool = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='max', attr=mirror_attr)
+ pool = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type='max', attr=mirror_attr)
# concat
concat = mx.symbol.Concat(*[conv, pool])
return concat
diff --git a/example/image-classification/symbol_inception-bn-full.py b/example/image-classification/symbol_inception-bn-full.py
index 27f6bebd9815..de87cf8ebe42 100644
--- a/example/image-classification/symbol_inception-bn-full.py
+++ b/example/image-classification/symbol_inception-bn-full.py
@@ -37,7 +37,7 @@ def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name):
cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name))
cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name))
# pool + proj
- pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type="max", name=('max_pool_%s_pool' % name))
+ pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name))
# concat
concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name)
return concat
diff --git a/example/image-classification/symbol_inception-bn.py b/example/image-classification/symbol_inception-bn.py
index 985ede4a4a19..c3a2fa8d08ae 100644
--- a/example/image-classification/symbol_inception-bn.py
+++ b/example/image-classification/symbol_inception-bn.py
@@ -45,7 +45,7 @@ def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name):
cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name))
cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name))
# pool + proj
- pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type="max", name=('max_pool_%s_pool' % name))
+ pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name))
# concat
concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name)
return concat
diff --git a/example/image-classification/train_cifar10.py b/example/image-classification/train_cifar10.py
index aa5e2e1b571c..dc3580cd3181 100644
--- a/example/image-classification/train_cifar10.py
+++ b/example/image-classification/train_cifar10.py
@@ -9,7 +9,7 @@
help = 'the cnn to use')
parser.add_argument('--data-dir', type=str, default='cifar10/',
help='the input data directory')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--num-examples', type=int, default=60000,
help='the number of training examples')
@@ -40,7 +40,7 @@ def _download(data_dir):
os.chdir(data_dir)
if (not os.path.exists('train.rec')) or \
(not os.path.exists('test.rec')) :
- os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip")
+ os.system("wget http://data.dmlc.ml/mxnet/data/cifar10.zip")
os.system("unzip -u cifar10.zip")
os.system("mv cifar/* .; rm -rf cifar; rm cifar10.zip")
os.chdir("..")
diff --git a/example/image-classification/train_cifar10_mirroring.py b/example/image-classification/train_cifar10_mirroring.py
index 81124a2f1776..24ded036bd71 100644
--- a/example/image-classification/train_cifar10_mirroring.py
+++ b/example/image-classification/train_cifar10_mirroring.py
@@ -8,7 +8,7 @@
# documentation could be expected when this feature is mature.
#
# When mirroring is turned on and set properly, we could expect smaller memory
-# consumption with slightly slower computation speed (due to extra forward
+# consumption with slightly slower computation speed (due to extra forward
# steps). We are not including a sample running log here, as this test case
# is only a functionality test. The using of pycuda GPU memory query is also
# not very good way of measuring the memory usage here.
@@ -24,7 +24,7 @@
help = 'the cnn to use')
parser.add_argument('--data-dir', type=str, default='cifar10/',
help='the input data directory')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--num-examples', type=int, default=60000,
help='the number of training examples')
@@ -55,7 +55,7 @@ def _download(data_dir):
os.chdir(data_dir)
if (not os.path.exists('train.rec')) or \
(not os.path.exists('test.rec')) :
- os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip")
+ os.system("wget http://data.dmlc.ml/mxnet/data/data/cifar10.zip")
os.system("unzip -u cifar10.zip")
os.system("mv cifar/* .; rm -rf cifar; rm cifar10.zip")
os.chdir("..")
diff --git a/example/image-classification/train_cifar10_resnet.py b/example/image-classification/train_cifar10_resnet.py
index a90acc4b0aed..b85ffd35c434 100644
--- a/example/image-classification/train_cifar10_resnet.py
+++ b/example/image-classification/train_cifar10_resnet.py
@@ -49,7 +49,7 @@
parser = argparse.ArgumentParser(description='train an image classifer on cifar10')
parser.add_argument('--data-dir', type=str, default='cifar10/',
help='the input data directory')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--num-examples', type=int, default=50000,
help='the number of training examples')
@@ -79,7 +79,7 @@ def _download(data_dir):
os.chdir(data_dir)
if (not os.path.exists('train.rec')) or \
(not os.path.exists('test.rec')):
- os.system('wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip')
+ os.system('wget http://data.dmlc.ml/mxnet/data/cifar10.zip')
os.system('unzip -u cifar10.zip')
os.system('mv cifar/* .; rm -rf cifar; rm cifar10.zip')
os.chdir('..')
diff --git a/example/image-classification/train_imagenet.py b/example/image-classification/train_imagenet.py
index e53c607bc1a7..394231bd3acc 100644
--- a/example/image-classification/train_imagenet.py
+++ b/example/image-classification/train_imagenet.py
@@ -30,7 +30,7 @@
help="load the model on an epoch using the model-prefix")
parser.add_argument('--batch-size', type=int, default=32,
help='the batch size')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--kv-store', type=str, default='local',
help='the kvstore type')
diff --git a/example/image-classification/train_mnist.R b/example/image-classification/train_mnist.R
index e4fde087b174..4d80512a8e92 100644
--- a/example/image-classification/train_mnist.R
+++ b/example/image-classification/train_mnist.R
@@ -4,11 +4,11 @@ require(mxnet)
download_ <- function(data_dir) {
dir.create(data_dir, showWarnings = FALSE)
setwd(data_dir)
- if ((!file.exists('train-images-idx3-ubyte')) ||
+ if ((!file.exists('train-images-idx3-ubyte')) ||
(!file.exists('train-labels-idx1-ubyte')) ||
(!file.exists('t10k-images-idx3-ubyte')) ||
(!file.exists('t10k-labels-idx1-ubyte'))) {
- download.file(url='http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip',
+ download.file(url='http://data.dmlc.ml/mxnet/data/mnist.zip',
destfile='mnist.zip', method='wget')
unzip("mnist.zip")
file.remove("mnist.zip")
@@ -83,7 +83,7 @@ get_iterator <- function(data_shape) {
}
parse_args <- function() {
- parser <- ArgumentParser(description='train an image classifer on mnist')
+ parser <- ArgumentParser(description='train an image classifer on mnist')
parser$add_argument('--network', type='character', default='mlp',
choices = c('mlp', 'lenet'),
help = 'the cnn to use')
diff --git a/example/image-classification/train_mnist.py b/example/image-classification/train_mnist.py
index 5b6fa3c05b60..fd7c00cfac25 100644
--- a/example/image-classification/train_mnist.py
+++ b/example/image-classification/train_mnist.py
@@ -12,10 +12,25 @@ def _download(data_dir):
(not os.path.exists('train-labels-idx1-ubyte')) or \
(not os.path.exists('t10k-images-idx3-ubyte')) or \
(not os.path.exists('t10k-labels-idx1-ubyte')):
- os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip")
+ os.system("wget http://data.dmlc.ml/mxnet/data/mnist.zip")
os.system("unzip -u mnist.zip; rm mnist.zip")
os.chdir("..")
+def get_loc(data, attr={'lr_mult':'0.01'}):
+ """
+ the localisation network in lenet-stn, it will increase acc about more than 1%,
+ when num-epoch >=15
+ """
+ loc = mx.symbol.Convolution(data=data, num_filter=30, kernel=(5, 5), stride=(2,2))
+ loc = mx.symbol.Activation(data = loc, act_type='relu')
+ loc = mx.symbol.Pooling(data=loc, kernel=(2, 2), stride=(2, 2), pool_type='max')
+ loc = mx.symbol.Convolution(data=loc, num_filter=60, kernel=(3, 3), stride=(1,1), pad=(1, 1))
+ loc = mx.symbol.Activation(data = loc, act_type='relu')
+ loc = mx.symbol.Pooling(data=loc, global_pool=True, kernel=(2, 2), pool_type='avg')
+ loc = mx.symbol.Flatten(data=loc)
+ loc = mx.symbol.FullyConnected(data=loc, num_hidden=6, name="stn_loc", attr=attr)
+ return loc
+
def get_mlp():
"""
multi-layer perceptron
@@ -29,13 +44,16 @@ def get_mlp():
mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
return mlp
-def get_lenet():
+def get_lenet(add_stn=False):
"""
LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick
Haffner. "Gradient-based learning applied to document recognition."
Proceedings of the IEEE (1998)
"""
data = mx.symbol.Variable('data')
+ if(add_stn):
+ data = mx.sym.SpatialTransformer(data=data, loc=get_loc(data), target_shape = (28,28),
+ transform_type="affine", sampler_type="bilinear")
# first conv
conv1 = mx.symbol.Convolution(data=data, kernel=(5,5), num_filter=20)
tanh1 = mx.symbol.Activation(data=conv1, act_type="tanh")
@@ -88,7 +106,7 @@ def get_iterator_impl(args, kv):
def parse_args():
parser = argparse.ArgumentParser(description='train an image classifer on mnist')
parser.add_argument('--network', type=str, default='mlp',
- choices = ['mlp', 'lenet'],
+ choices = ['mlp', 'lenet', 'lenet-stn'],
help = 'the cnn to use')
parser.add_argument('--data-dir', type=str, default='mnist/',
help='the input data directory')
@@ -124,6 +142,9 @@ def parse_args():
if args.network == 'mlp':
data_shape = (784, )
net = get_mlp()
+ elif args.network == 'lenet-stn':
+ data_shape = (1, 28, 28)
+ net = get_lenet(True)
else:
data_shape = (1, 28, 28)
net = get_lenet()
diff --git a/example/kaggle-ndsb1/predict_dsb.py b/example/kaggle-ndsb1/predict_dsb.py
index 9fd3c71d6bb2..5241730120c4 100644
--- a/example/kaggle-ndsb1/predict_dsb.py
+++ b/example/kaggle-ndsb1/predict_dsb.py
@@ -10,7 +10,7 @@
help='the batch size')
parser.add_argument('--data-dir', type=str, default="data48/",
help='the input data directory')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--model-prefix', type=str,default= "./models/sample_net-0",
help='the prefix of the model to load')
diff --git a/example/kaggle-ndsb1/train_dsb.py b/example/kaggle-ndsb1/train_dsb.py
index eeb57bed0a0e..6c54a0b0270e 100644
--- a/example/kaggle-ndsb1/train_dsb.py
+++ b/example/kaggle-ndsb1/train_dsb.py
@@ -27,7 +27,7 @@
help="load the model on an epoch using the model-prefix")
parser.add_argument('--batch-size', type=int, default=64,
help='the batch size')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--kv-store', type=str, default='local',
help='the kvstore type')
diff --git a/example/module/train_cifar10.py b/example/module/train_cifar10.py
index 804779dbce28..2603f9cf704f 100644
--- a/example/module/train_cifar10.py
+++ b/example/module/train_cifar10.py
@@ -15,7 +15,7 @@
help = 'the cnn to use')
parser.add_argument('--data-dir', type=str, default=default_data_dir,
help='the input data directory')
-parser.add_argument('--gpus', type=str, default='0',
+parser.add_argument('--gpus', type=str,
help='the gpus will be used, e.g "0,1,2,3"')
parser.add_argument('--num-examples', type=int, default=60000,
help='the number of training examples')
@@ -52,7 +52,7 @@ def _download(data_dir):
os.chdir(data_dir)
if (not os.path.exists('train.rec')) or \
(not os.path.exists('test.rec')) :
- os.system("wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip")
+ os.system("wget http://data.dmlc.ml/mxnet/data/cifar10.zip")
os.system("unzip -u cifar10.zip")
os.system("mv cifar/* .; rm -rf cifar; rm cifar10.zip")
os.chdir(cwd)
diff --git a/example/neural-style/README.md b/example/neural-style/README.md
index 50402cc4de07..31c29c3513e3 100644
--- a/example/neural-style/README.md
+++ b/example/neural-style/README.md
@@ -21,3 +21,5 @@ It takes 30 secs for a Titan X to generate the above 600x400 image.
* The current implementation is based the
[torch implementation](https://github.com/jcjohnson/neural-style). But we may
change it dramatically in the near future.
+
+* We will release multi-GPU version soon.
diff --git a/example/neural-style/end_to_end/README.md b/example/neural-style/end_to_end/README.md
new file mode 100644
index 000000000000..2f19bf51abe4
--- /dev/null
+++ b/example/neural-style/end_to_end/README.md
@@ -0,0 +1,20 @@
+# End to End Neural Art
+
+This is an implementation of blog: [http://dmlc.ml/mxnet/2016/06/20/end-to-end-neural-style.html](http://dmlc.ml/mxnet/2016/06/20/end-to-end-neural-style.html)
+
+
+We will release a Multi-GPU training code soon.
+
+## How to use
+
+
+1. First use `download.sh` to download pre-trained model and sample inputs
+
+2. Then prepare training dataset according to the blog
+
+3. Modify [boost_train.py](boost_train.py)
+
+## Pretrained Model
+
+Weight [https://github.com/dmlc/web-data/raw/master/mxnet/art/model.zip](https://github.com/dmlc/web-data/raw/master/mxnet/art/model.zip)
+Inference [boost_inference.py](boost_inference.py)
diff --git a/example/neural-style/end_to_end/basic.py b/example/neural-style/end_to_end/basic.py
new file mode 100644
index 000000000000..ed9d3f601554
--- /dev/null
+++ b/example/neural-style/end_to_end/basic.py
@@ -0,0 +1,162 @@
+import sys
+sys.path.insert(0, "../../mxnet/python/")
+
+import mxnet as mx
+import numpy as np
+import model_vgg19 as vgg
+
+class PretrainedInit(mx.init.Initializer):
+ def __init__(self, prefix, params, verbose=False):
+ self.prefix_len = len(prefix) + 1
+ self.verbose = verbose
+ self.arg_params = {k : v for k, v in params.items() if k.startswith("arg:")}
+ self.aux_params = {k : v for k, v in params.items() if k.startswith("aux:")}
+ self.arg_names = set([k[4:] for k in self.arg_params.keys()])
+ self.aux_names = set([k[4:] for k in self.aux_params.keys()])
+
+ def __call__(self, name, arr):
+ key = name[self.prefix_len:]
+ if key in self.arg_names:
+ if self.verbose:
+ print("Init %s" % name)
+ self.arg_params["arg:" + key].copyto(arr)
+ elif key in self.aux_params:
+ if self.verbose:
+ print("Init %s" % name)
+ self.aux_params["aux:" + key].copyto(arr)
+ else:
+ print("Unknown params: %s, init with 0" % name)
+ arr[:] = 0.
+
+
+def style_gram_symbol(input_shape, style):
+ _, output_shapes, _ = style.infer_shape(**input_shape)
+ gram_list = []
+ grad_scale = []
+ for i in range(len(style.list_outputs())):
+ shape = output_shapes[i]
+ x = mx.sym.Reshape(style[i], shape=(int(shape[1]), int(np.prod(shape[2:]))))
+ # use fully connected to quickly do dot(x, x^T)
+ gram = mx.sym.FullyConnected(x, x, no_bias=True, num_hidden=shape[1])
+ gram_list.append(gram)
+ grad_scale.append(np.prod(shape[1:]) * shape[1])
+ return mx.sym.Group(gram_list), grad_scale
+
+
+def get_loss(gram, content):
+ gram_loss = []
+ for i in range(len(gram.list_outputs())):
+ gvar = mx.sym.Variable("target_gram_%d" % i)
+ gram_loss.append(mx.sym.sum(mx.sym.square(gvar - gram[i])))
+ cvar = mx.sym.Variable("target_content")
+ content_loss = mx.sym.sum(mx.sym.square(cvar - content))
+ return mx.sym.Group(gram_loss), content_loss
+
+def get_content_module(prefix, dshape, ctx, params):
+ sym = vgg.get_vgg_symbol(prefix, True)
+ init = PretrainedInit(prefix, params)
+ mod = mx.mod.Module(symbol=sym,
+ data_names=("%s_data" % prefix,),
+ label_names=None,
+ context=ctx)
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=False)
+ mod.init_params(init)
+ return mod
+
+def get_style_module(prefix, dshape, ctx, params):
+ input_shape = {"%s_data" % prefix : dshape}
+ style, content = vgg.get_vgg_symbol(prefix)
+ gram, gscale = style_gram_symbol(input_shape, style)
+ init = PretrainedInit(prefix, params)
+ mod = mx.mod.Module(symbol=gram,
+ data_names=("%s_data" % prefix,),
+ label_names=None,
+ context=ctx)
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=False)
+ mod.init_params(init)
+ return mod
+
+
+def get_loss_module(prefix, dshape, ctx, params):
+ input_shape = {"%s_data" % prefix : dshape}
+ style, content = vgg.get_vgg_symbol(prefix)
+ gram, gscale = style_gram_symbol(input_shape, style)
+ style_loss, content_loss = get_loss(gram, content)
+ sym = mx.sym.Group([style_loss, content_loss])
+ init = PretrainedInit(prefix, params)
+ gram_size = len(gram.list_outputs())
+ mod = mx.mod.Module(symbol=sym,
+ data_names=("%s_data" % prefix,),
+ label_names=None,
+ context=ctx)
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)],
+ for_training=True, inputs_need_grad=True)
+ mod.init_params(init)
+ return mod, gscale
+
+
+
+if __name__ == "__main__":
+ from data_processing import PreprocessContentImage, PreprocessStyleImage
+ from data_processing import PostprocessImage, SaveImage
+ vgg_params = mx.nd.load("./model/vgg19.params")
+ style_weight = 2
+ content_weight = 10
+ long_edge = 384
+ content_np = PreprocessContentImage("./input/IMG_4343.jpg", long_edge)
+ style_np = PreprocessStyleImage("./input/starry_night.jpg", shape=content_np.shape)
+ dshape = content_np.shape
+ ctx = mx.gpu()
+ # style
+ style_mod = get_style_module("style", dshape, ctx, vgg_params)
+ style_mod.forward(mx.io.DataBatch([mx.nd.array(style_np)], [0]), is_train=False)
+ style_array = [arr.copyto(mx.cpu()) for arr in style_mod.get_outputs()]
+ del style_mod
+ # content
+ content_mod = get_content_module("content", dshape, ctx, vgg_params)
+ content_mod.forward(mx.io.DataBatch([mx.nd.array(content_np)], [0]), is_train=False)
+ content_array = content_mod.get_outputs()[0].copyto(mx.cpu())
+ del content_mod
+ # loss
+ mod, gscale = get_loss_module("loss", dshape, ctx, vgg_params)
+ extra_args = {"target_gram_%d" % i : style_array[i] for i in range(len(style_array))}
+ extra_args["target_content"] = content_array
+ mod.set_params(extra_args, {}, True, True)
+ grad_array = []
+ for i in range(len(style_array)):
+ grad_array.append(mx.nd.ones((1,), ctx) * (float(style_weight) / gscale[i]))
+ grad_array.append(mx.nd.ones((1,), ctx) * (float(content_weight)))
+ # train
+ img = mx.nd.zeros(content_np.shape, ctx=ctx)
+ img[:] = mx.rnd.uniform(-0.1, 0.1, img.shape)
+ lr = mx.lr_scheduler.FactorScheduler(step=80, factor=.9)
+ optimizer = mx.optimizer.SGD(
+ learning_rate = 0.001,
+ wd = 0.0005,
+ momentum=0.9,
+ lr_scheduler = lr)
+ optim_state = optimizer.create_state(0, img)
+
+ old_img = img.copyto(ctx)
+ clip_norm = 1 * np.prod(img.shape)
+
+ import logging
+ for e in range(800):
+ mod.forward(mx.io.DataBatch([img], [0]), is_train=True)
+ mod.backward(grad_array)
+ data_grad = mod.get_input_grads()[0]
+ gnorm = mx.nd.norm(data_grad).asscalar()
+ if gnorm > clip_norm:
+ print("Data Grad: ", gnorm / clip_norm)
+ data_grad[:] *= clip_norm / gnorm
+
+ optimizer.update(0, img, data_grad, optim_state)
+ new_img = img
+ eps = (mx.nd.norm(old_img - new_img) / mx.nd.norm(new_img)).asscalar()
+ old_img = new_img.copyto(ctx)
+ logging.info('epoch %d, relative change %f', e, eps)
+ if (e+1) % 50 == 0:
+ SaveImage(new_img.asnumpy(), 'output/tmp_'+str(e+1)+'.jpg')
+
+ SaveImage(new_img.asnumpy(), "./output/out.jpg")
+
diff --git a/example/neural-style/end_to_end/boost_inference.py b/example/neural-style/end_to_end/boost_inference.py
new file mode 100644
index 000000000000..72427bedc7a6
--- /dev/null
+++ b/example/neural-style/end_to_end/boost_inference.py
@@ -0,0 +1,38 @@
+import sys
+sys.path.insert(0, "../mxnet/python")
+
+import mxnet as mx
+import numpy as np
+
+#import basic
+import data_processing
+import gen_v3
+import gen_v4
+
+dshape = (1, 3, 480, 640)
+clip_norm = 1.0 * np.prod(dshape)
+model_prefix = "./model/"
+ctx = mx.gpu(0)
+
+
+
+# generator
+gens = [gen_v4.get_module("g0", dshape, ctx),
+ gen_v3.get_module("g1", dshape, ctx),
+ gen_v3.get_module("g2", dshape, ctx),
+ gen_v4.get_module("g3", dshape, ctx)]
+for i in range(len(gens)):
+ gens[i].load_params("./model/%d/v3_0002-0026000.params" % i)
+
+content_np = data_processing.PreprocessContentImage("../IMG_4343.jpg", min(dshape[2:]), dshape)
+data = [mx.nd.array(content_np)]
+for i in range(len(gens)):
+ gens[i].forward(mx.io.DataBatch([data[-1]], [0]), is_train=False)
+ new_img = gens[i].get_outputs()[0]
+ data.append(new_img.copyto(mx.cpu()))
+ data_processing.SaveImage(new_img.asnumpy(), "out_%d.jpg" % i)
+
+
+import os
+os.system("rm -rf out.zip")
+os.system("zip out.zip out_*")
diff --git a/example/neural-style/end_to_end/boost_train.py b/example/neural-style/end_to_end/boost_train.py
new file mode 100644
index 000000000000..9100cc1875a2
--- /dev/null
+++ b/example/neural-style/end_to_end/boost_train.py
@@ -0,0 +1,147 @@
+import sys
+sys.path.insert(0, "../../mxnet/python")
+
+import mxnet as mx
+import numpy as np
+
+import basic
+import data_processing
+import gen_v3
+import gen_v4
+
+# params
+vgg_params = mx.nd.load("./vgg19.params")
+style_weight = 1.2
+content_weight = 10
+dshape = (1, 3, 384, 384)
+clip_norm = 0.05 * np.prod(dshape)
+model_prefix = "v3"
+ctx = mx.gpu(0)
+
+# init style
+style_np = data_processing.PreprocessStyleImage("../starry_night.jpg", shape=dshape)
+style_mod = basic.get_style_module("style", dshape, ctx, vgg_params)
+style_mod.forward(mx.io.DataBatch([mx.nd.array(style_np)], [0]), is_train=False)
+style_array = [arr.copyto(mx.cpu()) for arr in style_mod.get_outputs()]
+del style_mod
+
+# content
+content_mod = basic.get_content_module("content", dshape, ctx, vgg_params)
+
+# loss
+loss, gscale = basic.get_loss_module("loss", dshape, ctx, vgg_params)
+extra_args = {"target_gram_%d" % i : style_array[i] for i in range(len(style_array))}
+loss.set_params(extra_args, {}, True, True)
+grad_array = []
+for i in range(len(style_array)):
+ grad_array.append(mx.nd.ones((1,), ctx) * (float(style_weight) / gscale[i]))
+grad_array.append(mx.nd.ones((1,), ctx) * (float(content_weight)))
+
+# generator
+gens = [gen_v4.get_module("g0", dshape, ctx),
+ gen_v3.get_module("g1", dshape, ctx),
+ gen_v3.get_module("g2", dshape, ctx),
+ gen_v4.get_module("g3", dshape, ctx)]
+for gen in gens:
+ gen.init_optimizer(
+ optimizer='sgd',
+ optimizer_params={
+ 'learning_rate': 1e-4,
+ 'momentum' : 0.9,
+ 'wd': 5e-3,
+ 'clip_gradient' : 5.0
+ })
+
+
+# tv-loss
+def get_tv_grad_executor(img, ctx, tv_weight):
+ """create TV gradient executor with input binded on img
+ """
+ if tv_weight <= 0.0:
+ return None
+ nchannel = img.shape[1]
+ simg = mx.sym.Variable("img")
+ skernel = mx.sym.Variable("kernel")
+ channels = mx.sym.SliceChannel(simg, num_outputs=nchannel)
+ out = mx.sym.Concat(*[
+ mx.sym.Convolution(data=channels[i], weight=skernel,
+ num_filter=1,
+ kernel=(3, 3), pad=(1,1),
+ no_bias=True, stride=(1,1))
+ for i in range(nchannel)])
+ kernel = mx.nd.array(np.array([[0, -1, 0],
+ [-1, 4, -1],
+ [0, -1, 0]])
+ .reshape((1, 1, 3, 3)),
+ ctx) / 8.0
+ out = out * tv_weight
+ return out.bind(ctx, args={"img": img,
+ "kernel": kernel})
+tv_weight = 1e-2
+
+start_epoch = 0
+end_epoch = 3
+
+
+# data
+import os
+import random
+import logging
+
+data_root = "../data/"
+file_list = os.listdir(data_root)
+num_image = len(file_list)
+logging.info("Dataset size: %d" % num_image)
+
+
+# train
+
+for i in range(start_epoch, end_epoch):
+ random.shuffle(file_list)
+ for idx in range(num_image):
+ loss_grad_array = []
+ data_array = []
+ path = data_root + file_list[idx]
+ content_np = data_processing.PreprocessContentImage(path, min(dshape[2:]), dshape)
+ data = mx.nd.array(content_np)
+ data_array.append(data)
+ # get content
+ content_mod.forward(mx.io.DataBatch([data], [0]), is_train=False)
+ content_array = content_mod.get_outputs()[0].copyto(mx.cpu())
+ # set target content
+ loss.set_params({"target_content" : content_array}, {}, True, True)
+ # gen_forward
+ for k in range(len(gens)):
+ gens[k].forward(mx.io.DataBatch([data_array[-1]], [0]), is_train=True)
+ data_array.append(gens[k].get_outputs()[0].copyto(mx.cpu()))
+ # loss forward
+ loss.forward(mx.io.DataBatch([data_array[-1]], [0]), is_train=True)
+ loss.backward(grad_array)
+ grad = loss.get_input_grads()[0]
+ loss_grad_array.append(grad.copyto(mx.cpu()))
+ grad = mx.nd.zeros(data.shape)
+ for k in range(len(gens) - 1, -1, -1):
+ tv_grad_executor = get_tv_grad_executor(gens[k].get_outputs()[0],
+ ctx, tv_weight)
+ tv_grad_executor.forward()
+
+ grad[:] += loss_grad_array[k] + tv_grad_executor.outputs[0].copyto(mx.cpu())
+ gnorm = mx.nd.norm(grad).asscalar()
+ if gnorm > clip_norm:
+ grad[:] *= clip_norm / gnorm
+
+ gens[k].backward([grad])
+ gens[k].update()
+ if idx % 20 == 0:
+ logging.info("Epoch %d: Image %d" % (i, idx))
+ for k in range(len(gens)):
+ logging.info("Data Norm :%.5f" %\
+ (mx.nd.norm(gens[k].get_input_grads()[0]).asscalar() / np.prod(dshape)))
+ if idx % 1000 == 0:
+ for k in range(len(gens)):
+ gens[k].save_params("./model/%d/%s_%04d-%07d.params" % (k, model_prefix, i, idx))
+
+
+
+
+
diff --git a/example/neural-style/end_to_end/data_processing.py b/example/neural-style/end_to_end/data_processing.py
new file mode 100644
index 000000000000..5469fb008d7a
--- /dev/null
+++ b/example/neural-style/end_to_end/data_processing.py
@@ -0,0 +1,67 @@
+import numpy as np
+from skimage import io, transform
+from skimage.restoration import denoise_tv_chambolle
+import logging
+import random
+FORMAT = '%(asctime)-15s %(message)s'
+logging.basicConfig(level=logging.INFO, format=FORMAT)
+
+def PreprocessContentImage(path, short_edge, dshape=None):
+ img = io.imread(path)
+ #logging.info("load the content image, size = %s", img.shape[:2])
+ factor = float(short_edge) / min(img.shape[:2])
+ new_size = (int(img.shape[0] * factor), int(img.shape[1] * factor))
+ resized_img = transform.resize(img, new_size)
+ sample = np.asarray(resized_img) * 256
+ if dshape != None:
+ # random crop
+ xx = int((sample.shape[0] - dshape[2]))
+ yy = int((sample.shape[1] - dshape[3]))
+ xstart = random.randint(0, xx)
+ ystart = random.randint(0, yy)
+ xend = xstart + dshape[2]
+ yend = ystart + dshape[3]
+ sample = sample[xstart:xend, ystart:yend, :]
+
+ # swap axes to make image from (224, 224, 3) to (3, 224, 224)
+ sample = np.swapaxes(sample, 0, 2)
+ sample = np.swapaxes(sample, 1, 2)
+ # sub mean
+ sample[0, :] -= 123.68
+ sample[1, :] -= 116.779
+ sample[2, :] -= 103.939
+ #logging.info("resize the content image to %s", sample.shape)
+ return np.resize(sample, (1, 3, sample.shape[1], sample.shape[2]))
+
+def PreprocessStyleImage(path, shape):
+ img = io.imread(path)
+ resized_img = transform.resize(img, (shape[2], shape[3]))
+ sample = np.asarray(resized_img) * 256
+ sample = np.swapaxes(sample, 0, 2)
+ sample = np.swapaxes(sample, 1, 2)
+
+ sample[0, :] -= 123.68
+ sample[1, :] -= 116.779
+ sample[2, :] -= 103.939
+ return np.resize(sample, (1, 3, sample.shape[1], sample.shape[2]))
+
+def PostprocessImage(img):
+ img = np.resize(img, (3, img.shape[2], img.shape[3]))
+ img[0, :] += 123.68
+ img[1, :] += 116.779
+ img[2, :] += 103.939
+ img = np.swapaxes(img, 1, 2)
+ img = np.swapaxes(img, 0, 2)
+ img = np.clip(img, 0, 255)
+ return img.astype('uint8')
+
+def SaveImage(img, filename, remove_noise=0.02):
+ logging.info('save output to %s', filename)
+ out = PostprocessImage(img)
+ if remove_noise != 0.0:
+ out = denoise_tv_chambolle(out, weight=remove_noise, multichannel=True)
+ io.imsave(filename, out)
+
+
+
+
diff --git a/example/neural-style/end_to_end/gen_v3.py b/example/neural-style/end_to_end/gen_v3.py
new file mode 100644
index 000000000000..dbc83b1ea004
--- /dev/null
+++ b/example/neural-style/end_to_end/gen_v3.py
@@ -0,0 +1,72 @@
+
+# coding: utf-8
+
+# In[1]:
+
+import sys
+sys.path.insert(0, "../../mxnet/python")
+
+
+# In[2]:
+
+import mxnet as mx
+import numpy as np
+
+
+def Conv(data, num_filter, kernel=(5, 5), pad=(2, 2), stride=(2, 2)):
+ sym = mx.sym.Convolution(data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=False)
+ sym = mx.sym.BatchNorm(sym, fix_gamma=False)
+ sym = mx.sym.LeakyReLU(sym, act_type="leaky")
+ return sym
+
+
+def Deconv(data, num_filter, im_hw, kernel=(7, 7), pad=(2, 2), stride=(2, 2), crop=True, out=False):
+ sym = mx.sym.Deconvolution(data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True)
+ if crop:
+ sym = mx.sym.Crop(sym, offset=(1, 1), h_w=im_hw, num_args=1)
+ sym = mx.sym.BatchNorm(sym, fix_gamma=False)
+ if out == False:
+ sym = mx.sym.LeakyReLU(sym, act_type="leaky")
+ else:
+ sym = mx.sym.Activation(sym, act_type="tanh")
+ return sym
+
+# In[70]:
+
+def get_generator(prefix, im_hw):
+ data = mx.sym.Variable("%s_data" % prefix)
+ conv1 = Conv(data, 64) # 192
+ conv1_1 = Conv(conv1, 48, kernel=(3, 3), pad=(1, 1), stride=(1, 1))
+ conv2 = Conv(conv1_1, 128) # 96
+ conv2_1 = Conv(conv2, 96, kernel=(3, 3), pad=(1, 1), stride=(1, 1))
+ conv3 = Conv(conv2_1, 256) # 48
+ conv3_1 = Conv(conv3, 192, kernel=(3, 3), pad=(1, 1), stride=(1, 1))
+ deconv1 = Deconv(conv3_1, 128, (int(im_hw[0] / 4), int(im_hw[1] / 4))) + conv2
+ conv4_1 = Conv(deconv1, 160, kernel=(3, 3), pad=(1, 1), stride=(1, 1))
+ deconv2 = Deconv(conv4_1, 64, (int(im_hw[0] / 2), int(im_hw[1] / 2))) + conv1
+ conv5_1 = Conv(deconv2, 96, kernel=(3, 3), pad=(1, 1), stride=(1, 1))
+ deconv3 = Deconv(conv5_1, 3, im_hw, kernel=(8, 8), pad=(3, 3), out=True, crop=False)
+ raw_out = (deconv3 * 128) + 128
+ norm = mx.sym.SliceChannel(raw_out, num_outputs=3)
+ r_ch = norm[0] - 123.68
+ g_ch = norm[1] - 116.779
+ b_ch = norm[2] - 103.939
+ norm_out = 0.4 * mx.sym.Concat(*[r_ch, g_ch, b_ch]) + 0.6 * data
+ return norm_out
+
+def get_module(prefix, dshape, ctx, is_train=True):
+ sym = get_generator(prefix, dshape[-2:])
+ mod = mx.mod.Module(symbol=sym,
+ data_names=("%s_data" % prefix,),
+ label_names=None,
+ context=ctx)
+ if is_train:
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=True, inputs_need_grad=True)
+ else:
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=False, inputs_need_grad=False)
+ mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
+ return mod
+
+
+
+
diff --git a/example/neural-style/end_to_end/gen_v4.py b/example/neural-style/end_to_end/gen_v4.py
new file mode 100644
index 000000000000..379e904b9690
--- /dev/null
+++ b/example/neural-style/end_to_end/gen_v4.py
@@ -0,0 +1,86 @@
+
+# coding: utf-8
+
+# In[1]:
+
+import sys
+sys.path.insert(0, "../mxnet/python")
+
+
+# In[2]:
+
+import mxnet as mx
+import numpy as np
+
+
+def Conv(data, num_filter, kernel=(5, 5), pad=(2, 2), stride=(2, 2)):
+ sym = mx.sym.Convolution(data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=False)
+ sym = mx.sym.BatchNorm(sym, fix_gamma=False)
+ sym = mx.sym.LeakyReLU(sym, act_type="leaky")
+ return sym
+
+
+def Deconv(data, num_filter, kernel=(6, 6), pad=(2, 2), stride=(2, 2), out=False):
+ sym = mx.sym.Deconvolution(data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True)
+ sym = mx.sym.BatchNorm(sym, fix_gamma=False)
+ if out == False:
+ sym = mx.sym.LeakyReLU(sym, act_type="leaky")
+ else:
+ sym = mx.sym.Activation(sym, act_type="tanh")
+ return sym
+
+# In[70]:
+
+def get_generator(prefix, im_hw):
+ data = mx.sym.Variable("%s_data" % prefix)
+
+ conv1_1 = mx.sym.Convolution(data, num_filter=48, kernel=(5, 5), pad=(2, 2), no_bias=False)
+ conv1_1 = mx.sym.BatchNorm(conv1_1, fix_gamma=False)
+ conv1_1 = mx.sym.LeakyReLU(conv1_1, act_type="leaky")
+
+ conv2_1 = mx.sym.Convolution(conv1_1, num_filter=32, kernel=(5, 5), pad=(2, 2), no_bias=False)
+ conv2_1 = mx.sym.BatchNorm(conv2_1, fix_gamma=False)
+ conv2_1 = mx.sym.LeakyReLU(conv2_1, act_type="leaky")
+
+ conv3_1 = mx.sym.Convolution(conv2_1, num_filter=64, kernel=(3, 3), pad=(1, 1), no_bias=False)
+ conv3_1 = mx.sym.BatchNorm(conv3_1, fix_gamma=False)
+ conv3_1 = mx.sym.LeakyReLU(conv3_1, act_type="leaky")
+
+ conv4_1 = mx.sym.Convolution(conv3_1, num_filter=32, kernel=(5, 5), pad=(2, 2), no_bias=False)
+ conv4_1 = mx.sym.BatchNorm(conv4_1, fix_gamma=False)
+ conv4_1 = mx.sym.LeakyReLU(conv4_1, act_type="leaky")
+
+ conv5_1 = mx.sym.Convolution(conv4_1, num_filter=48, kernel=(5, 5), pad=(2, 2), no_bias=False)
+ conv5_1 = mx.sym.BatchNorm(conv5_1, fix_gamma=False)
+ conv5_1 = mx.sym.LeakyReLU(conv5_1, act_type="leaky")
+
+ conv6_1 = mx.sym.Convolution(conv5_1, num_filter=32, kernel=(5, 5), pad=(2, 2), no_bias=True)
+ conv6_1 = mx.sym.BatchNorm(conv6_1, fix_gamma=False)
+ conv6_1 = mx.sym.LeakyReLU(conv6_1, act_type="leaky")
+
+ out = mx.sym.Convolution(conv6_1, num_filter=3, kernel=(3, 3), pad=(1, 1), no_bias=True)
+ out = mx.sym.BatchNorm(out, fix_gamma=False)
+ out = mx.sym.Activation(data=out, act_type="tanh")
+ raw_out = (out * 128) + 128
+ norm = mx.sym.SliceChannel(raw_out, num_outputs=3)
+ r_ch = norm[0] - 123.68
+ g_ch = norm[1] - 116.779
+ b_ch = norm[2] - 103.939
+ norm_out = 0.4 * mx.sym.Concat(*[r_ch, g_ch, b_ch]) + 0.6 * data
+ return norm_out
+
+def get_module(prefix, dshape, ctx, is_train=True):
+ sym = get_generator(prefix, dshape[-2:])
+ mod = mx.mod.Module(symbol=sym,
+ data_names=("%s_data" % prefix,),
+ label_names=None,
+ context=ctx)
+ if is_train:
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=True, inputs_need_grad=True)
+ else:
+ mod.bind(data_shapes=[("%s_data" % prefix, dshape)], for_training=False, inputs_need_grad=False)
+ mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
+ return mod
+
+
+
diff --git a/example/neural-style/end_to_end/model_vgg19.py b/example/neural-style/end_to_end/model_vgg19.py
new file mode 100644
index 000000000000..6e287b55b2fa
--- /dev/null
+++ b/example/neural-style/end_to_end/model_vgg19.py
@@ -0,0 +1,96 @@
+import mxnet as mx
+import os, sys
+from collections import namedtuple
+
+ConvExecutor = namedtuple('ConvExecutor', ['executor', 'data', 'data_grad', 'style', 'content', 'arg_dict'])
+
+def get_vgg_symbol(prefix, content_only=False):
+ # declare symbol
+ data = mx.sym.Variable("%s_data" % prefix)
+ conv1_1 = mx.symbol.Convolution(name='%s_conv1_1' % prefix, data=data , num_filter=64, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu1_1 = mx.symbol.Activation(data=conv1_1 , act_type='relu')
+ conv1_2 = mx.symbol.Convolution(name='%s_conv1_2' % prefix, data=relu1_1 , num_filter=64, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu1_2 = mx.symbol.Activation(data=conv1_2 , act_type='relu')
+ pool1 = mx.symbol.Pooling(data=relu1_2 , pad=(0,0), kernel=(2,2), stride=(2,2), pool_type='avg')
+ conv2_1 = mx.symbol.Convolution(name='%s_conv2_1' % prefix, data=pool1 , num_filter=128, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu2_1 = mx.symbol.Activation(data=conv2_1 , act_type='relu')
+ conv2_2 = mx.symbol.Convolution(name='%s_conv2_2' % prefix, data=relu2_1 , num_filter=128, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu2_2 = mx.symbol.Activation(data=conv2_2 , act_type='relu')
+ pool2 = mx.symbol.Pooling(data=relu2_2 , pad=(0,0), kernel=(2,2), stride=(2,2), pool_type='avg')
+ conv3_1 = mx.symbol.Convolution(name='%s_conv3_1' % prefix, data=pool2 , num_filter=256, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu3_1 = mx.symbol.Activation(data=conv3_1 , act_type='relu')
+ conv3_2 = mx.symbol.Convolution(name='%s_conv3_2' % prefix, data=relu3_1 , num_filter=256, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu3_2 = mx.symbol.Activation(data=conv3_2 , act_type='relu')
+ conv3_3 = mx.symbol.Convolution(name='%s_conv3_3' % prefix, data=relu3_2 , num_filter=256, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu3_3 = mx.symbol.Activation(data=conv3_3 , act_type='relu')
+ conv3_4 = mx.symbol.Convolution(name='%s_conv3_4' % prefix, data=relu3_3 , num_filter=256, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu3_4 = mx.symbol.Activation(data=conv3_4 , act_type='relu')
+ pool3 = mx.symbol.Pooling(data=relu3_4 , pad=(0,0), kernel=(2,2), stride=(2,2), pool_type='avg')
+ conv4_1 = mx.symbol.Convolution(name='%s_conv4_1' % prefix, data=pool3 , num_filter=512, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu4_1 = mx.symbol.Activation(data=conv4_1 , act_type='relu')
+ conv4_2 = mx.symbol.Convolution(name='%s_conv4_2' % prefix, data=relu4_1 , num_filter=512, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu4_2 = mx.symbol.Activation(data=conv4_2 , act_type='relu')
+ conv4_3 = mx.symbol.Convolution(name='%s_conv4_3' % prefix, data=relu4_2 , num_filter=512, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu4_3 = mx.symbol.Activation(data=conv4_3 , act_type='relu')
+ conv4_4 = mx.symbol.Convolution(name='%s_conv4_4' % prefix, data=relu4_3 , num_filter=512, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu4_4 = mx.symbol.Activation(data=conv4_4 , act_type='relu')
+ pool4 = mx.symbol.Pooling(data=relu4_4 , pad=(0,0), kernel=(2,2), stride=(2,2), pool_type='avg')
+ conv5_1 = mx.symbol.Convolution(name='%s_conv5_1' % prefix, data=pool4 , num_filter=512, pad=(1,1), kernel=(3,3), stride=(1,1), workspace=1024)
+ relu5_1 = mx.symbol.Activation(data=conv5_1 , act_type='relu')
+
+
+ if content_only:
+ return relu4_2
+ # style and content layers
+ style = mx.sym.Group([relu1_1, relu2_1, relu3_1, relu4_1, relu5_1])
+ content = mx.sym.Group([relu4_2])
+ return style, content
+
+
+def get_executor_with_style(style, content, input_size, ctx):
+ out = mx.sym.Group([style, content])
+ # make executor
+ arg_shapes, output_shapes, aux_shapes = out.infer_shape(data=(1, 3, input_size[0], input_size[1]))
+ arg_names = out.list_arguments()
+ arg_dict = dict(zip(arg_names, [mx.nd.zeros(shape, ctx=ctx) for shape in arg_shapes]))
+ grad_dict = {"data": arg_dict["data"].copyto(ctx)}
+ # init with pretrained weight
+ pretrained = mx.nd.load("./model/vgg19.params")
+ for name in arg_names:
+ if name == "data":
+ continue
+ key = "arg:" + name
+ if key in pretrained:
+ pretrained[key].copyto(arg_dict[name])
+ else:
+ print("Skip argument %s" % name)
+ executor = out.bind(ctx=ctx, args=arg_dict, args_grad=grad_dict, grad_req="write")
+ return ConvExecutor(executor=executor,
+ data=arg_dict["data"],
+ data_grad=grad_dict["data"],
+ style=executor.outputs[:-1],
+ content=executor.outputs[-1],
+ arg_dict=arg_dict)
+
+def get_executor_content(content, input_size, ctx):
+ arg_shapes, output_shapes, aux_shapes = content.infer_shape(data=(1, 3, input_size[0], input_size[1]))
+ arg_names = out.list_arguments()
+ arg_dict = dict(zip(arg_names, [mx.nd.zeros(shape, ctx=ctx) for shape in arg_shapes]))
+ pretrained = mx.nd.load("./model/vgg19.params")
+ for name in arg_names:
+ if name == "data":
+ continue
+ key = "arg:" + name
+ if key in pretrained:
+ pretrained[key].copyto(arg_dict[name])
+ else:
+ print("Skip argument %s" % name)
+ executor = out.bind(ctx=ctx, args=arg_dict, args_grad=[], grad_req="null")
+ return ConvExecutor(executor=executor,
+ data=arg_dict["data"],
+ data_grad=None,
+ style=None,
+ content=executor.outputs[0],
+ arg_dict=arg_dict)
+
+
diff --git a/example/notebooks/predict-with-pretrained-model.ipynb b/example/notebooks/predict-with-pretrained-model.ipynb
index 73ba99071890..f85157dc714f 100644
--- a/example/notebooks/predict-with-pretrained-model.ipynb
+++ b/example/notebooks/predict-with-pretrained-model.ipynb
@@ -16,7 +16,7 @@
"For network structure, you can visualize it in [Composite Symbol Demo](composite_symbol.ipynb)\n",
"\n",
"The pre-trained Inception-BatchNorm network is able to be downloaded from:\n",
- "[http://webdocs.cs.ualberta.ca/~bx3/data/Inception.zip](http://webdocs.cs.ualberta.ca/~bx3/data/Inception.zip)\n",
+ "[http://data.dmlc.ml/mxnet/data/Inception.zip](http://data.dmlc.ml/mxnet/data/Inception.zip)\n",
"This model achieves Top-1 Accuracy: 70% and Top-5 Accuracy: 89.9%\n",
"\n",
"Note: This network is trained by using very simple augmentation (random flip + random crop). We will release model with a little bit more augmentation (which achieves better validation score)"
diff --git a/example/rcnn/LICENSE b/example/rcnn/LICENSE
index 07b70c57b8d5..84eb07876986 100644
--- a/example/rcnn/LICENSE
+++ b/example/rcnn/LICENSE
@@ -42,6 +42,31 @@ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
+Faster R-CNN
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Microsoft Corporation
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+
Caffe
COPYRIGHT
diff --git a/example/rcnn/README.md b/example/rcnn/README.md
index e35d09cb92e7..60f5527cb907 100644
--- a/example/rcnn/README.md
+++ b/example/rcnn/README.md
@@ -1,4 +1,10 @@
-# Fast R-CNN in MXNet
+# Faster R-CNN in MXNet with distributed implementation and data parallelization
+
+Region Proposal Network solves object detection as a regression problem
+from the objectness perspective. Bounding boxes are predicted by applying
+learned bounding box deltas to base boxes, namely anchor boxes across
+different positions in feature maps. Training process directly learns a
+mapping from raw image intensities to bounding box transformation targets.
Fast R-CNN treats general object detection as a classification problem and
bounding box prediction as a regression problem. Classifying cropped region
@@ -7,132 +13,67 @@ detection results. Cropping feature maps instead of image input accelerates
computation utilizing shared convolution maps. Bounding box displacements
are simultaneously learned in the training process.
+Faster R-CNN utilize an alternate optimization training process between RPN
+and Fast R-CNN. Fast R-CNN weights are used to initiate RPN for training.
+
## Getting Started
+* Install python package `easydict`, `cv2`, `matplotlib`. MXNet require `numpy`.
+* Install MXNet with version no later than Commit 8a3424e, preferably the latest master.
+ Follow the instructions at http://mxnet.readthedocs.io/en/latest/how_to/build.html. Install the python interface.
+* Try out detection result by running `python demo.py --prefix final --epoch 0 --image myimage.jpg --gpu 0`.
+ Suppose you have downloaded pretrained network and place the extracted file `final-0000.params` in this folder and there is an image named `myimage.jpg`.
-* MXNet with `ROIPooling` and `smooth_l1` operators are required
-* Download data and place them to `data` folder according to `Data Folder Structure`.
- You might want to create a symbolic link to VOCdevkit folder
-```
-Pascal VOCdevkit
-http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
-http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
-http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
-Ross's precomputed object proposals
-https://github.com/rbgirshick/fast-rcnn/blob/master/data/scripts/fetch_selective_search_data.sh
-```
-* Data Folder Structure (suppose root is `data`)
-```
-demo
-selective_search_data
-cache (created by imdb)
--- name + source + roidb.pkl (create by imdb)
--- name (created by detection and evaluation)
-VOCdevkit
--- VOC + year (JPEG images and annotations)
--- results (created by evaluation)
----- VOC + year
------- main
--------- comp4_det_val_aeroplane.txt
-```
+## Training and Testing Faster R-CNN
+* Install additional python package `scipy`.
+* Download Pascal VOC data and place them to `data` folder according to `Data Folder Structure`.
+ You might want to create a symbolic link to VOCdevkit folder by `ln -s /path/to/your/VOCdevkit data/VOCdevkit`.
* Download VGG16 pretrained model, use `mxnet/tools/caffe_converter` to convert it,
- rename to `vgg16-symbol.json` and `vgg16-0001.params` and place it in `model` folder
-* Download 'demo' data and put it in `data/demo` from
-```
-https://github.com/rbgirshick/fast-rcnn/tree/master/data/demo
-```
-
-## Training
-* Start training by run `python train.py`. Variable args can be found by run
-`python train.py --help`.
-* Training can be done in cpu, modify `train.py` accordingly.
-* Training can be done in multiple gpus.
-```
-usage: train.py [-h] [--image_set IMAGE_SET] [--year YEAR]
- [--root_path ROOT_PATH] [--devkit_path DEVKIT_PATH]
- [--pretrained PRETRAINED] [--epoch EPOCH] [--prefix PREFIX]
- [--gpus GPU_ID] [--begin_epoch BEGIN_EPOCH]
- [--end_epoch END_EPOCH] [--frequent FREQUENT]
-
-Train a Fast R-CNN network
-
-optional arguments:
- -h, --help show this help message and exit
- --image_set IMAGE_SET
- can be trainval or train
- --year YEAR can be 2007, 2010, 2012
- --root_path ROOT_PATH
- output data folder
- --devkit_path DEVKIT_PATH
- VOCdevkit path
- --pretrained PRETRAINED
- pretrained model prefix
- --epoch EPOCH epoch of pretrained model
- --prefix PREFIX new model prefix
- --gpus GPU_ID GPU devices to train with
- --begin_epoch BEGIN_EPOCH
- begin epoch of training
- --end_epoch END_EPOCH
- end epoch of training
- --frequent FREQUENT frequency of logging
- --kv_store KV_STORE kv_store type used in multi-device training
- --work_load_list WORK_LOAD_LIST
- list of work load for different devices
-```
-- Performance in terms of training speed
+ rename to `vgg16-symbol.json` and `vgg16-0001.params` and place it in `model` folder.
+ `model` folder will be used to place model checkpoints along the training process.
+* Start training by running `python train_alternate.py` after VOCdevkit is ready.
+ A typical command would be `python train_alternate.py --gpus 0`. This will train the network on the VOC07 trainval.
+ More control of training process can be found in the argparse help accessed by `python train_alternate.py -h`.
+* Start testing by run `python test.py` after completing the training process.
+ A typical command would be `python test.py --has_rpn --prefix model/final --epoch 8`. This will test the network on the VOC07 test.
+ Adding a `--vis` will turn on visualization and `-h` will show help as in the training process.
- | GPUs | batch size | samples per second |
- | --- | --- | --- |
- | 1 | 2 | 3.02 |
- | 2 | 4 | 3.80 |
- | 4 | 8 | 5.96 |
-
-
-## Testing
-* Start testing by run `python test.py`. Variable args can be found by run
-`python test.py --help`.
-* Testing can be done in cpu, modify `test.py` accordingly.
-```
-usage: test.py [-h] [--image_set IMAGE_SET] [--year YEAR]
- [--root_path ROOT_PATH] [--devkit_path DEVKIT_PATH]
- [--prefix PREFIX] [--epoch EPOCH] [--gpu GPU_ID]
-
-Test a Fast R-CNN network
+## Training and Testing Fast R-CNN
+* Download Pascal VOC data and place them to `data` folder according to `Data Folder Structure`.
+ You might want to create a symbolic link to VOCdevkit folder by `ln -s /path/to/your/VOCdevkit data/VOCdevkit`.
+* Download precomputed selective search data and place them to `data` folder according to `Data Folder Structure`.
+* Download VGG16 pretrained model, use `mxnet/tools/caffe_converter` to convert it,
+ rename to `vgg16-symbol.json` and `vgg16-0001.params` and place it in `model` folder.
+ `model` folder will be used to place model checkpoints along the training process.
+* Start training by running `python -m tools.train_rcnn --proposal ss` to use the selective search proposal.
+* Start testing by running `python -m tools.test_rcnn --proposal ss`.
-optional arguments:
- -h, --help show this help message and exit
- --image_set IMAGE_SET
- can be test
- --year YEAR can be 2007, 2010, 2012
- --root_path ROOT_PATH
- output data folder
- --devkit_path DEVKIT_PATH
- VOCdevkit path
- --prefix PREFIX new model prefix
- --epoch EPOCH epoch of pretrained model
- --gpu GPU_ID GPU device to test with
-```
+## Information
+* Download link to trained model
+ Baidu Yun: http://pan.baidu.com/s/1boRhGvH (ixiw) or Dropbox: https://www.dropbox.com/s/jrr83q0ai2ckltq/final-0000.params.tar.gz?dl=0
+* Download link to Pascal VOC and precomputed selective search proposals
-## Demonstration
-* If no training has been done, download reference model from Ross Girshick and use
-`mxnet/caffe/caffe_converter` to convert it to MXNet.
-```
-https://github.com/rbgirshick/fast-rcnn/blob/master/data/scripts/fetch_fast_rcnn_models.sh
-```
-* Run demo by `demo.py --gpu 0 --prefix path-to-model --epoch 0`, in which
-`path-to-model + '%4d' % epoch.params` will be the params file and
-`path-to-model + '-symbol.json'` will be the symbol json.
-* Demo can be run in cpu, modify `demo.py` accordingly.
-```
-usage: demo.py [-h] [--prefix PREFIX] [--epoch EPOCH] [--gpu GPU_ID]
+ ```
+ Pascal VOCdevkit
+ http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
+ http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
+ http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar
+ selective_search_data (by Ross Girshick)
+ Download link accessible at https://github.com/rbgirshick/fast-rcnn/blob/master/data/scripts/fetch_selective_search_data.sh
+ ```
-Demonstrate a Fast R-CNN network
+* Data Folder Structure (create a `data` folder if there is none)
-optional arguments:
- -h, --help show this help message and exit
- --prefix PREFIX new model prefix
- --epoch EPOCH epoch of pretrained model
- --gpu GPU_ID GPU device to test with
-```
+ ```
+ VOCdevkit
+ -- VOC + year (JPEG images and annotations)
+ -- results (will be created by evaluation)
+ ---- VOC + year
+ ------ main
+ -------- comp4_det_val_aeroplane.txt
+ selective_search_data
+ rpn_data (will be created by rpn)
+ cache (will be created by imdb)
+ ```
## Disclaimer
This repository used code from [MXNet](https://github.com/dmlc/mxnet),
@@ -142,3 +83,12 @@ This repository used code from [MXNet](https://github.com/dmlc/mxnet),
[Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/),
[ImageNet](http://image-net.org/). Model comes from
[VGG16](http://www.robots.ox.ac.uk/~vgg/research/very_deep/).
+
+## References
+1. Tianqi Chen, Mu Li, Yutian Li, Min Lin, Naiyan Wang, Minjie Wang, Tianjun Xiao, Bing Xu, Chiyuan Zhang, and Zheng Zhang. MXNet: A Flexible and Efficient Machine Learning Library for Heterogeneous Distributed Systems. In Neural Information Processing Systems, Workshop on Machine Learning Systems, 2015
+2. Ross Girshick. "Fast R-CNN." In Proceedings of the IEEE International Conference on Computer Vision, 2015.
+3. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. "Faster R-CNN: Towards real-time object detection with region proposal networks." In Advances in Neural Information Processing Systems, 2015.
+4. Yangqing Jia, Evan Shelhamer, Jeff Donahue, Sergey Karayev, Jonathan Long, Ross Girshick, Sergio Guadarrama, and Trevor Darrell. "Caffe: Convolutional architecture for fast feature embedding." In Proceedings of the ACM International Conference on Multimedia, 2014.
+5. Mark Everingham, Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman. "The pascal visual object classes (voc) challenge." International journal of computer vision 88, no. 2 (2010): 303-338.
+6. Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. "ImageNet: A large-scale hierarchical image database." In Computer Vision and Pattern Recognition, IEEE Conference on, 2009.
+7. Karen Simonyan, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
diff --git a/example/rcnn/demo.py b/example/rcnn/demo.py
index 768b1a7fe15a..fb110849663b 100644
--- a/example/rcnn/demo.py
+++ b/example/rcnn/demo.py
@@ -1,25 +1,73 @@
import argparse
-import mxnet as mx
import os
-from tools.load_model import load_param
-from rcnn.symbol import get_symbol_vgg_test
+import numpy as np
+import cv2
+
+import mxnet as mx
+
+from helper.processing.image_processing import resize, transform
+from helper.processing.nms import nms
+from rcnn.config import config
from rcnn.detector import Detector
-from tools.demo_net import demo_net
+from rcnn.symbol import get_vgg_test
+from rcnn.tester import vis_all_detection
+from utils.load_model import load_param
def get_net(prefix, epoch, ctx):
args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
- sym = get_symbol_vgg_test()
+ sym = get_vgg_test()
detector = Detector(sym, ctx, args, auxs)
return detector
+CLASSES = ('__background__',
+ 'aeroplane', 'bicycle', 'bird', 'boat',
+ 'bottle', 'bus', 'car', 'cat', 'chair',
+ 'cow', 'diningtable', 'dog', 'horse',
+ 'motorbike', 'person', 'pottedplant',
+ 'sheep', 'sofa', 'train', 'tvmonitor')
+
+
+def demo_net(detector, image_name):
+ """
+ wrapper for detector
+ :param detector: Detector
+ :param image_name: image name
+ :return: None
+ """
+ config.TEST.HAS_RPN = True
+ assert os.path.exists(image_name), image_name + ' not found'
+ im = cv2.imread(image_name)
+ im_array, im_scale = resize(im, config.SCALES[0], config.MAX_SIZE)
+ im_array = transform(im_array, config.PIXEL_MEANS)
+ im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scale]], dtype=np.float32)
+
+ scores, boxes = detector.im_detect(im_array, im_info)
+
+ all_boxes = [[] for _ in CLASSES]
+ CONF_THRESH = 0.8
+ NMS_THRESH = 0.3
+ for cls in CLASSES:
+ cls_ind = CLASSES.index(cls)
+ cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
+ cls_scores = scores[:, cls_ind]
+ keep = np.where(cls_scores >= CONF_THRESH)[0]
+ cls_boxes = cls_boxes[keep, :]
+ cls_scores = cls_scores[keep]
+ dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
+ keep = nms(dets.astype(np.float32), NMS_THRESH)
+ all_boxes[cls_ind] = dets[keep, :]
+
+ boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
+ vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
+
+
def parse_args():
- parser = argparse.ArgumentParser(description='Demonstrate a Fast R-CNN network')
- parser.add_argument('--prefix', dest='prefix', help='new model prefix',
- default=os.path.join(os.getcwd(), 'model', 'frcnn'), type=str)
- parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
- default=9, type=int)
+ parser = argparse.ArgumentParser(description='Demonstrate a Faster R-CNN network')
+ parser.add_argument('--image', dest='image', help='custom image', type=str)
+ parser.add_argument('--prefix', dest='prefix', help='saved model prefix', type=str)
+ parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model', type=int)
parser.add_argument('--gpu', dest='gpu_id', help='GPU device to test with',
default=0, type=int)
args = parser.parse_args()
@@ -29,5 +77,5 @@ def parse_args():
args = parse_args()
ctx = mx.gpu(args.gpu_id)
detector = get_net(args.prefix, args.epoch, ctx)
- demo_net(detector, os.path.join(os.getcwd(), 'data', 'demo', '000004'))
- demo_net(detector, os.path.join(os.getcwd(), 'data', 'demo', '001551'))
+ demo_net(detector, args.image)
+ demo_net(detector, args.image)
diff --git a/example/rcnn/helper/dataset/imdb.py b/example/rcnn/helper/dataset/imdb.py
index 3c431ff5bfd1..8f53ce5412e6 100644
--- a/example/rcnn/helper/dataset/imdb.py
+++ b/example/rcnn/helper/dataset/imdb.py
@@ -32,7 +32,7 @@ def roidb(self, gt_roidb):
def create_roidb_from_box_list(self, box_list, gt_roidb):
"""
given ground truth, prepare roidb
- :param box_list: [image_index][box_index][x1, x2, y1, y2]
+ :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2]
:param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
:return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
"""
@@ -43,7 +43,7 @@ def create_roidb_from_box_list(self, box_list, gt_roidb):
num_boxes = boxes.shape[0]
overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
- if gt_roidb is not None:
+ if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
gt_boxes = gt_roidb[i]['boxes']
gt_classes = gt_roidb[i]['gt_classes']
# n boxes and k gt_boxes => n * k overlap
@@ -106,5 +106,86 @@ def append_flipped_images(self, roidb):
self.image_set_index *= 2
return roidb
+ def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None, area='all', limit=None):
+ """
+ evaluate detection proposal recall metrics
+ record max overlap value for each gt box; return vector of overlap values
+ :param roidb: used to evaluate
+ :param candidate_boxes: if not given, use roidb's non-gt boxes
+ :param thresholds: array-like recall threshold
+ :param area: index in area ranges
+ :param limit: limit of bounding box evaluated
+ :return: None
+ ar: average recall, recalls: vector recalls at each IoU overlap threshold
+ thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps
+ """
+ areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
+ '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
+ area_ranges = [[0**2, 1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2],
+ [96**2, 128**2], [128**2, 256**2], [256**2, 512**2], [512**2, 1e5**2]]
+ assert areas.has_key(area), 'unknown area range: {}'.format(area)
+ area_range = area_ranges[areas[area]]
+ gt_overlaps = np.zeros(0)
+ num_pos = 0
+ for i in range(self.num_images):
+ # check for max_overlaps == 1 avoids including crowd annotations
+ max_gt_overlaps = roidb[i]['gt_overlaps'].toarray().max(axis=1)
+ gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0]
+ gt_boxes = roidb[i]['boxes'][gt_inds, :]
+ gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
+ valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]
+ gt_boxes = gt_boxes[valid_gt_inds, :]
+ num_pos += len(valid_gt_inds)
+
+ if candidate_boxes is None:
+ # default is use the non-gt boxes from roidb
+ non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0]
+ boxes = roidb[i]['boxes'][non_gt_inds, :]
+ else:
+ boxes = candidate_boxes[i]
+ if boxes.shape[0] == 0:
+ continue
+ if limit is not None and boxes.shape[0] > limit:
+ boxes = boxes[:limit, :]
+
+ overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float))
+
+ _gt_overlaps = np.zeros((gt_boxes.shape[0]))
+ for j in range(gt_boxes.shape[0]):
+ # find which proposal maximally covers each gt box
+ argmax_overlaps = overlaps.argmax(axis=0)
+ # get the IoU amount of coverage for each gt box
+ max_overlaps = overlaps.max(axis=0)
+ # find which gt box is covered by most IoU
+ gt_ind = max_overlaps.argmax()
+ gt_ovr = max_overlaps.max()
+ assert (gt_ovr >= 0)
+ # find the proposal box that covers the best covered gt box
+ box_ind = argmax_overlaps[gt_ind]
+ # record the IoU coverage of this gt box
+ _gt_overlaps[j] = overlaps[box_ind, gt_ind]
+ assert (_gt_overlaps[j] == gt_ovr)
+ # mark the proposal box and the gt box as used
+ overlaps[box_ind, :] = -1
+ overlaps[:, gt_ind] = -1
+ # append recorded IoU coverage level
+ gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
+
+ gt_overlaps = np.sort(gt_overlaps)
+ if thresholds is None:
+ step = 0.05
+ thresholds = np.arange(0.5, 0.95 + 1e-5, step)
+ recalls = np.zeros_like(thresholds)
+
+ # compute recall for each IoU threshold
+ for i, t in enumerate(thresholds):
+ recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
+ ar = recalls.mean()
+
+ # print results
+ print 'average recall: {:.3f}'.format(ar)
+ for threshold, recall in zip(thresholds, recalls):
+ print 'recall @{:.2f}: {:.3f}'.format(threshold, recall)
+
def evaluate_detections(self, detections):
raise NotImplementedError
diff --git a/example/rcnn/helper/dataset/pascal_voc.py b/example/rcnn/helper/dataset/pascal_voc.py
index 7d2356ba3eef..9ae27f4b91e5 100644
--- a/example/rcnn/helper/dataset/pascal_voc.py
+++ b/example/rcnn/helper/dataset/pascal_voc.py
@@ -13,6 +13,7 @@
import cPickle
from imdb import IMDB
from voc_eval import voc_eval
+from helper.processing.bbox_process import unique_boxes, filter_small_boxes
class PascalVOC(IMDB):
@@ -43,7 +44,8 @@ def __init__(self, image_set, year, root_path, devkit_path):
self.num_images = len(self.image_set_index)
self.config = {'comp_id': 'comp4',
- 'use_diff': True}
+ 'use_diff': False,
+ 'min_size': 2}
@property
def cache_path(self):
@@ -102,17 +104,14 @@ def load_pascal_annotation(self, index):
:param index: index of a specific image
:return: record['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
"""
- import xml.dom.minidom as minidom
+ import xml.etree.ElementTree as ET
filename = os.path.join(self.data_path, 'Annotations', index + '.xml')
- # print 'Loading: {}'.format(filename)
- def get_data_from_tag(node, tag):
- return node.getElementsByTagName(tag)[0].childNodes[0].data
-
- with open(filename) as f:
- data = minidom.parseString(f.read())
-
- objs = data.getElementsByTagName('object')
+ tree = ET.parse(filename)
+ objs = tree.findall('object')
+ if not self.config['use_diff']:
+ non_diff_objs = [obj for obj in objs if int(obj.find('difficult').text) == 0]
+ objs = non_diff_objs
num_objs = len(objs)
boxes = np.zeros((num_objs, 4), dtype=np.uint16)
@@ -122,13 +121,13 @@ def get_data_from_tag(node, tag):
class_to_index = dict(zip(self.classes, range(self.num_classes)))
# Load object bounding boxes into a data frame.
for ix, obj in enumerate(objs):
+ bbox = obj.find('bndbox')
# Make pixel indexes 0-based
- x1 = float(get_data_from_tag(obj, 'xmin')) - 1
- y1 = float(get_data_from_tag(obj, 'ymin')) - 1
- x2 = float(get_data_from_tag(obj, 'xmax')) - 1
- y2 = float(get_data_from_tag(obj, 'ymax')) - 1
- cls = class_to_index[
- str(get_data_from_tag(obj, "name")).lower().strip()]
+ x1 = float(bbox.find('xmin').text) - 1
+ y1 = float(bbox.find('ymin').text) - 1
+ x2 = float(bbox.find('xmax').text) - 1
+ y2 = float(bbox.find('ymax').text) - 1
+ cls = class_to_index[obj.find('name').text.lower().strip()]
boxes[ix, :] = [x1, y1, x2, y2]
gt_classes[ix] = cls
overlaps[ix, cls] = 1.0
@@ -155,7 +154,12 @@ def load_selective_search_roidb(self, gt_roidb):
box_list = []
for i in range(raw_data.shape[0]):
- box_list.append(raw_data[i][:, (1, 0, 3, 2)] - 1) # pascal voc dataset starts from 1.
+ boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 # pascal voc dataset starts from 1.
+ keep = unique_boxes(boxes)
+ boxes = boxes[keep, :]
+ keep = filter_small_boxes(boxes, self.config['min_size'])
+ boxes = boxes[keep, :]
+ box_list.append(boxes)
return self.create_roidb_from_box_list(box_list, gt_roidb)
@@ -183,6 +187,33 @@ def selective_search_roidb(self, gt_roidb):
return roidb
+ def load_rpn_roidb(self, gt_roidb):
+ """
+ turn rpn detection boxes into roidb
+ :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
+ :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
+ """
+ rpn_file = os.path.join(self.root_path, 'rpn_data', self.name + '_rpn.pkl')
+ print 'loading {}'.format(rpn_file)
+ assert os.path.exists(rpn_file), 'rpn data not found at {}'.format(rpn_file)
+ with open(rpn_file, 'rb') as f:
+ box_list = cPickle.load(f)
+ return self.create_roidb_from_box_list(box_list, gt_roidb)
+
+ def rpn_roidb(self, gt_roidb):
+ """
+ get rpn roidb and ground truth roidb
+ :param gt_roidb: ground truth roidb
+ :return: roidb of rpn (ground truth included)
+ """
+ if self.image_set != 'test':
+ rpn_roidb = self.load_rpn_roidb(gt_roidb)
+ roidb = IMDB.merge_roidbs(gt_roidb, rpn_roidb)
+ else:
+ print 'rpn database need not be used in test'
+ roidb = self.load_rpn_roidb(gt_roidb)
+ return roidb
+
def evaluate_detections(self, detections):
"""
top level evaluations
diff --git a/example/rcnn/helper/dataset/voc_eval.py b/example/rcnn/helper/dataset/voc_eval.py
index 3b2c153c0de5..8975b619b708 100644
--- a/example/rcnn/helper/dataset/voc_eval.py
+++ b/example/rcnn/helper/dataset/voc_eval.py
@@ -95,7 +95,6 @@ def voc_eval(detpath, annopath, imageset_file, classname, cache_dir, ovthresh=0.
else:
with open(cache_file, 'r') as f:
recs = cPickle.load(f)
- print 'ground truth annotations loaded from cache file {}'.format(cache_file)
# extract objects in :param classname:
class_recs = {}
diff --git a/example/rcnn/helper/processing/bbox_process.py b/example/rcnn/helper/processing/bbox_process.py
new file mode 100644
index 000000000000..60d8a7af86bd
--- /dev/null
+++ b/example/rcnn/helper/processing/bbox_process.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+
+def unique_boxes(boxes, scale=1.0):
+ """ return indices of unique boxes """
+ v = np.array([1, 1e3, 1e6, 1e9])
+ hashes = np.round(boxes * scale).dot(v)
+ _, index = np.unique(hashes, return_index=True)
+ return np.sort(index)
+
+
+def filter_small_boxes(boxes, min_size):
+ w = boxes[:, 2] - boxes[:, 0]
+ h = boxes[:, 3] - boxes[:, 1]
+ keep = np.where((w >= min_size) & (h > min_size))[0]
+ return keep
diff --git a/example/rcnn/helper/processing/bbox_regression.py b/example/rcnn/helper/processing/bbox_regression.py
index 7e58324fc541..840a96cc5ec5 100644
--- a/example/rcnn/helper/processing/bbox_regression.py
+++ b/example/rcnn/helper/processing/bbox_regression.py
@@ -5,6 +5,7 @@
import numpy as np
from rcnn.config import config
+from bbox_transform import bbox_transform
def bbox_overlaps(boxes, query_boxes):
@@ -43,6 +44,8 @@ def compute_bbox_regression_targets(rois, overlaps, labels):
# Indices of ground-truth ROIs
gt_inds = np.where(overlaps == 1)[0]
+ if len(gt_inds) == 0:
+ print 'something wrong : zero ground truth rois'
# Indices of examples for which we try to make predictions
ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]
@@ -55,27 +58,9 @@ def compute_bbox_regression_targets(rois, overlaps, labels):
gt_rois = rois[gt_inds[gt_assignment], :]
ex_rois = rois[ex_inds, :]
- ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + config['EPS']
- ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + config['EPS']
- ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
- ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
-
- gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + config['EPS']
- gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + config['EPS']
- gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
- gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
-
- targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
- targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
- targets_dw = np.log(gt_widths / ex_widths)
- targets_dh = np.log(gt_heights / ex_heights)
-
targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
targets[ex_inds, 0] = labels[ex_inds]
- targets[ex_inds, 1] = targets_dx
- targets[ex_inds, 2] = targets_dy
- targets[ex_inds, 3] = targets_dw
- targets[ex_inds, 4] = targets_dh
+ targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
return targets
diff --git a/example/rcnn/helper/processing/bbox_transform.py b/example/rcnn/helper/processing/bbox_transform.py
index ba5187f2ab0c..0757a70eedd7 100644
--- a/example/rcnn/helper/processing/bbox_transform.py
+++ b/example/rcnn/helper/processing/bbox_transform.py
@@ -1,10 +1,37 @@
"""
-This file has functions about bounding box post processing.
+This file has functions about bounding box processing.
"""
import numpy as np
+def bbox_transform(ex_rois, gt_rois):
+ """
+ compute bounding box regression targets from ex_rois to gt_rois
+ :param ex_rois: [N, 4]
+ :param gt_rois: [N, 4]
+ :return: [N, 4]
+ """
+ ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
+ ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
+ ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0)
+ ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0)
+
+ gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
+ gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
+ gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0)
+ gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0)
+
+ targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14)
+ targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14)
+ targets_dw = np.log(gt_widths / ex_widths)
+ targets_dh = np.log(gt_heights / ex_heights)
+
+ targets = np.vstack(
+ (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
+ return targets
+
+
def bbox_pred(boxes, box_deltas):
"""
Transform the set of class-agnostic boxes into class-specific boxes
@@ -17,10 +44,10 @@ def bbox_pred(boxes, box_deltas):
return np.zeros((0, box_deltas.shape[1]))
boxes = boxes.astype(np.float, copy=False)
- widths = boxes[:, 2] - boxes[:, 0] + 1e-14
- heights = boxes[:, 3] - boxes[:, 1] + 1e-14
- ctr_x = boxes[:, 0] + 0.5 * widths
- ctr_y = boxes[:, 1] + 0.5 * heights
+ widths = boxes[:, 2] - boxes[:, 0] + 1.0
+ heights = boxes[:, 3] - boxes[:, 1] + 1.0
+ ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0)
+ ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0)
dx = box_deltas[:, 0::4]
dy = box_deltas[:, 1::4]
@@ -34,13 +61,13 @@ def bbox_pred(boxes, box_deltas):
pred_boxes = np.zeros(box_deltas.shape)
# x1
- pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
+ pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0)
# y1
- pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
+ pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0)
# x2
- pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
+ pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0)
# y2
- pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
+ pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0)
return pred_boxes
@@ -53,11 +80,11 @@ def clip_boxes(boxes, im_shape):
:return: [N, 4* num_classes]
"""
# x1 >= 0
- boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
+ boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
- boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
+ boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
- boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
+ boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
- boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
+ boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
diff --git a/example/rcnn/helper/processing/generate_anchor.py b/example/rcnn/helper/processing/generate_anchor.py
new file mode 100644
index 000000000000..8996a3aaab48
--- /dev/null
+++ b/example/rcnn/helper/processing/generate_anchor.py
@@ -0,0 +1,72 @@
+"""
+Generate base anchors on index 0
+"""
+
+import numpy as np
+
+
+def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
+ scales=2 ** np.arange(3, 6)):
+ """
+ Generate anchor (reference) windows by enumerating aspect ratios X
+ scales wrt a reference (0, 0, 15, 15) window.
+ """
+
+ base_anchor = np.array([1, 1, base_size, base_size]) - 1
+ ratio_anchors = _ratio_enum(base_anchor, ratios)
+ anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
+ for i in xrange(ratio_anchors.shape[0])])
+ return anchors
+
+
+def _whctrs(anchor):
+ """
+ Return width, height, x center, and y center for an anchor (window).
+ """
+
+ w = anchor[2] - anchor[0] + 1
+ h = anchor[3] - anchor[1] + 1
+ x_ctr = anchor[0] + 0.5 * (w - 1)
+ y_ctr = anchor[1] + 0.5 * (h - 1)
+ return w, h, x_ctr, y_ctr
+
+
+def _mkanchors(ws, hs, x_ctr, y_ctr):
+ """
+ Given a vector of widths (ws) and heights (hs) around a center
+ (x_ctr, y_ctr), output a set of anchors (windows).
+ """
+
+ ws = ws[:, np.newaxis]
+ hs = hs[:, np.newaxis]
+ anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
+ y_ctr - 0.5 * (hs - 1),
+ x_ctr + 0.5 * (ws - 1),
+ y_ctr + 0.5 * (hs - 1)))
+ return anchors
+
+
+def _ratio_enum(anchor, ratios):
+ """
+ Enumerate a set of anchors for each aspect ratio wrt an anchor.
+ """
+
+ w, h, x_ctr, y_ctr = _whctrs(anchor)
+ size = w * h
+ size_ratios = size / ratios
+ ws = np.round(np.sqrt(size_ratios))
+ hs = np.round(ws * ratios)
+ anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+ return anchors
+
+
+def _scale_enum(anchor, scales):
+ """
+ Enumerate a set of anchors for each scale wrt an anchor.
+ """
+
+ w, h, x_ctr, y_ctr = _whctrs(anchor)
+ ws = w * scales
+ hs = h * scales
+ anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
+ return anchors
diff --git a/example/rcnn/helper/processing/roidb.py b/example/rcnn/helper/processing/roidb.py
index d68ddb94290c..7ad1b26c182f 100644
--- a/example/rcnn/helper/processing/roidb.py
+++ b/example/rcnn/helper/processing/roidb.py
@@ -4,6 +4,7 @@
extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets']
"""
+import cv2
import numpy as np
from bbox_regression import compute_bbox_regression_targets
@@ -17,8 +18,13 @@ def prepare_roidb(imdb, roidb):
:param roidb: roidb
:return: None
"""
+ print 'prepare roidb'
for i in range(len(roidb)): # image_index
roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i])
+ if config.TRAIN.ASPECT_GROUPING:
+ size = cv2.imread(roidb[i]['image']).shape
+ roidb[i]['height'] = size[0]
+ roidb[i]['width'] = size[1]
gt_overlaps = roidb[i]['gt_overlaps'].toarray()
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
@@ -51,22 +57,27 @@ def add_bbox_regression_targets(roidb):
max_classes = roidb[im_i]['max_classes']
roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes)
- # compute mean, std values
- class_counts = np.zeros((num_classes, 1)) + config.EPS
- sums = np.zeros((num_classes, 4))
- squared_sums = np.zeros((num_classes, 4))
- for im_i in range(num_images):
- targets = roidb[im_i]['bbox_targets']
- for cls in range(1, num_classes):
- cls_indexes = np.where(targets[:, 0] == cls)[0]
- if cls_indexes.size > 0:
- class_counts[cls] += cls_indexes.size
- sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
- squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
+ if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
+ # use fixed / precomputed means and stds instead of empirical values
+ means = np.tile(np.array(config.TRAIN.BBOX_MEANS), (num_classes, 1))
+ stds = np.tile(np.array(config.TRAIN.BBOX_STDS), (num_classes, 1))
+ else:
+ # compute mean, std values
+ class_counts = np.zeros((num_classes, 1)) + config.EPS
+ sums = np.zeros((num_classes, 4))
+ squared_sums = np.zeros((num_classes, 4))
+ for im_i in range(num_images):
+ targets = roidb[im_i]['bbox_targets']
+ for cls in range(1, num_classes):
+ cls_indexes = np.where(targets[:, 0] == cls)[0]
+ if cls_indexes.size > 0:
+ class_counts[cls] += cls_indexes.size
+ sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0)
+ squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0)
- means = sums / class_counts
- # var(x) = E(x^2) - E(x)^2
- stds = np.sqrt(squared_sums / class_counts - means ** 2)
+ means = sums / class_counts
+ # var(x) = E(x^2) - E(x)^2
+ stds = np.sqrt(squared_sums / class_counts - means ** 2)
# normalized targets
for im_i in range(num_images):
diff --git a/example/rcnn/rcnn/config.py b/example/rcnn/rcnn/config.py
index 9ae69cb8adc1..fb9826c1b186 100644
--- a/example/rcnn/rcnn/config.py
+++ b/example/rcnn/rcnn/config.py
@@ -3,26 +3,65 @@
config = edict()
+# image processing config
config.EPS = 1e-14
config.PIXEL_MEANS = np.array([[[123.68, 116.779, 103.939]]])
+config.SCALES = (600, ) # single scale training and testing
+config.MAX_SIZE = 1000
+
+# nms config
+config.USE_GPU_NMS = True
+config.GPU_ID = 0
config.TRAIN = edict()
-config.TRAIN.SCALES = (600, )
-config.TRAIN.MAX_SIZE = 1000
+# R-CNN and RPN
+config.TRAIN.FINETUNE = False
+config.TRAIN.BATCH_SIZE = 128 # used in grad_scale
+# R-CNN
+config.TRAIN.HAS_RPN = False
+config.TRAIN.ASPECT_GROUPING = True
config.TRAIN.BATCH_IMAGES = 2
-config.TRAIN.BATCH_SIZE = 128
config.TRAIN.FG_FRACTION = 0.25
config.TRAIN.FG_THRESH = 0.5
config.TRAIN.BG_THRESH_HI = 0.5
config.TRAIN.BG_THRESH_LO = 0.1
+# R-CNN bounding box regression
config.TRAIN.BBOX_REGRESSION_THRESH = 0.5
config.TRAIN.BBOX_INSIDE_WEIGHTS = np.array([1.0, 1.0, 1.0, 1.0])
+# RPN anchor loader
+config.TRAIN.RPN_BATCH_SIZE = 256
+config.TRAIN.RPN_FG_FRACTION = 0.5
+config.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
+config.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
+config.TRAIN.RPN_CLOBBER_POSITIVES = False
+config.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
+config.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
+
+# used for end2end training
+# RPN proposal
+config.TRAIN.RPN_NMS_THRESH = 0.7
+config.TRAIN.RPN_PRE_NMS_TOP_N = 12000
+config.TRAIN.RPN_POST_NMS_TOP_N = 6000
+config.TRAIN.RPN_MIN_SIZE = 16
+# approximate bounding box regression
+config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED = False
+config.TRAIN.BBOX_MEANS = (0.0, 0.0, 0.0, 0.0)
+config.TRAIN.BBOX_STDS = (0.1, 0.1, 0.2, 0.2)
+
config.TEST = edict()
-config.TEST.SCALES = (600, )
+# R-CNN testing
+config.TEST.HAS_RPN = False
+config.TEST.BATCH_IMAGES = 1
config.TEST.NMS = 0.3
config.TEST.DEDUP_BOXES = 1. / 16.
+
+# RPN proposal
+config.TEST.RPN_NMS_THRESH = 0.7
+config.TEST.RPN_PRE_NMS_TOP_N = 6000
+config.TEST.RPN_POST_NMS_TOP_N = 300
+config.TEST.RPN_MIN_SIZE = 16
diff --git a/example/rcnn/rcnn/data_iter.py b/example/rcnn/rcnn/data_iter.py
index 69d9d64a3b2b..765334b2090f 100644
--- a/example/rcnn/rcnn/data_iter.py
+++ b/example/rcnn/rcnn/data_iter.py
@@ -1,12 +1,10 @@
import mxnet as mx
import numpy as np
import minibatch
-from mxnet.executor_manager import _split_input_slice
-from helper.processing.image_processing import tensor_vstack
class ROIIter(mx.io.DataIter):
- def __init__(self, roidb, ctx, batch_size=2, shuffle=False, mode='train', work_load_list=None):
+ def __init__(self, roidb, batch_size=2, shuffle=False, mode='train'):
"""
This Iter will provide roi data to Fast R-CNN network
:param roidb: must be preprocessed
@@ -17,11 +15,9 @@ def __init__(self, roidb, ctx, batch_size=2, shuffle=False, mode='train', work_l
super(ROIIter, self).__init__()
self.roidb = roidb
- self.ctx = ctx
self.batch_size = batch_size
self.shuffle = shuffle
self.mode = mode
- self.work_load_list = work_load_list
if self.mode != 'train':
assert self.batch_size == 1
@@ -34,17 +30,16 @@ def __init__(self, roidb, ctx, batch_size=2, shuffle=False, mode='train', work_l
self.data = None
self.label = None
self.get_batch()
+ self.data_name = self.data.keys()
+ self.label_name = self.label.keys()
@property
def provide_data(self):
- return [('data', self.data[0].shape), ('rois', self.data[1].shape)]
+ return [(k, v.shape) for k, v in self.data.items()]
@property
def provide_label(self):
- return [('cls_prob_label', self.label[0].shape),
- ('bbox_loss_target', self.label[1].shape),
- ('bbox_loss_inside_weight', self.label[2].shape),
- ('bbox_loss_outside_weight', self.label[3].shape)]
+ return [(k, v.shape) for k, v in self.label.items()]
def reset(self):
self.cur = 0
@@ -58,13 +53,8 @@ def next(self):
if self.iter_next():
self.get_batch()
self.cur += self.batch_size
- if self.mode == 'train':
- return mx.io.DataBatch(data=self.data, label=self.label,
- pad=self.getpad(), index=self.getindex(),
- provide_data=self.provide_data, provide_label=self.provide_label)
- else:
- return mx.io.DataBatch(data=self.data, label=self.label,
- pad=self.getpad(), index=self.getindex())
+ return mx.io.DataBatch(data=self.data, label=self.label,
+ pad=self.getpad(), index=self.getindex())
else:
raise StopIteration
@@ -72,17 +62,17 @@ def getindex(self):
return self.cur / self.batch_size
def getpad(self):
- if self.cur + self.batch_size > self.size:
- return self.cur + self.batch_size - self.size
- else:
- return 0
+ return self.batch_size - self.size % self.batch_size
def get_batch(self):
if self.mode == 'train':
self.batch = self._get_train_batch()
- self.data = [mx.nd.array(self.batch['data']), mx.nd.array(self.batch['rois'])]
- self.label = [mx.nd.array(self.batch['labels']), mx.nd.array(self.batch['bbox_targets']),
- mx.nd.array(self.batch['bbox_inside_weights']), mx.nd.array(self.batch['bbox_outside_weights'])]
+ self.data = {'data': self.batch['data'],
+ 'rois': self.batch['rois']}
+ self.label = {'cls_prob_label': self.batch['labels'],
+ 'bbox_loss_target': self.batch['bbox_targets'],
+ 'bbox_loss_inside_weight': self.batch['bbox_inside_weights'],
+ 'bbox_loss_outside_weight': self.batch['bbox_outside_weights']}
else:
self.batch = self._get_test_batch()
self.data = {'data': self.batch['data'],
@@ -94,34 +84,11 @@ def _get_train_batch(self):
utilize minibatch sampling, e.g. 2 images and 64 rois per image
:return: training batch (e.g. 128 samples)
"""
- work_load_list = self.work_load_list
- ctx = self.ctx
- if work_load_list is None:
- work_load_list = [1] * len(ctx)
- assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
- "Invalid settings for work load. "
- slices = _split_input_slice(self.batch_size, work_load_list)
-
cur_from = self.cur
- cur_to = cur_from + self.batch_size
- if cur_to <= self.size:
- roidb = [self.roidb[i] for i in range(cur_from, cur_to)]
- else:
- pad = cur_to - self.size
- roidb = self.roidb[cur_from:] + self.roidb[:pad]
-
- batch_list = []
- for islice in slices:
- num_im = islice.stop - islice.start
- iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
- batch = minibatch.get_minibatch(iroidb, self.num_classes, self.ctx)
- batch_list.append(batch)
-
- all_batch = dict()
- for key in batch_list[0].keys():
- all_batch[key] = tensor_vstack([batch[key] for batch in batch_list])
-
- return all_batch
+ cur_to = min(cur_from + self.batch_size, self.size)
+ roidb = [self.roidb[i] for i in range(cur_from, cur_to)]
+ batch = minibatch.get_minibatch(roidb, self.num_classes)
+ return batch
def _get_test_batch(self):
"""
diff --git a/example/rcnn/rcnn/detector.py b/example/rcnn/rcnn/detector.py
index cc9787d3fff4..8e424c973108 100644
--- a/example/rcnn/rcnn/detector.py
+++ b/example/rcnn/rcnn/detector.py
@@ -16,15 +16,16 @@ def __init__(self, symbol, ctx=None,
self.aux_params = aux_params
self.executor = None
- def im_detect(self, im_array, roi_array):
+ def im_detect(self, im_array, im_info=None, roi_array=None):
"""
perform detection of designated im, box, must follow minibatch.get_testbatch format
:param im_array: numpy.ndarray [b c h w]
+ :param im_info: numpy.ndarray [b 3]
:param roi_array: numpy.ndarray [roi_num 5]
:return: scores, pred_boxes
"""
# remove duplicate feature rois
- if config.TEST.DEDUP_BOXES > 0:
+ if config.TEST.DEDUP_BOXES > 0 and not config.TEST.HAS_RPN:
roi_array = roi_array
# rank roi by v .* (b, dx, dy, dw, dh)
v = np.array([1, 1e3, 1e6, 1e9, 1e12])
@@ -33,27 +34,44 @@ def im_detect(self, im_array, roi_array):
_, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True)
roi_array = roi_array[index, :]
- self.arg_params['data'] = mx.nd.array(im_array, self.ctx)
- self.arg_params['rois'] = mx.nd.array(roi_array, self.ctx)
- arg_shapes, out_shapes, aux_shapes = \
- self.symbol.infer_shape(data=self.arg_params['data'].shape, rois=self.arg_params['rois'].shape)
+ # fill in data
+ if config.TEST.HAS_RPN:
+ self.arg_params['data'] = mx.nd.array(im_array, self.ctx)
+ self.arg_params['im_info'] = mx.nd.array(im_info, self.ctx)
+ arg_shapes, out_shapes, aux_shapes = \
+ self.symbol.infer_shape(data=self.arg_params['data'].shape, im_info=self.arg_params['im_info'].shape)
+ else:
+ self.arg_params['data'] = mx.nd.array(im_array, self.ctx)
+ self.arg_params['rois'] = mx.nd.array(roi_array, self.ctx)
+ arg_shapes, out_shapes, aux_shapes = \
+ self.symbol.infer_shape(data=self.arg_params['data'].shape, rois=self.arg_params['rois'].shape)
+
+ # fill in label and aux
arg_shapes_dict = {name: shape for name, shape in zip(self.symbol.list_arguments(), arg_shapes)}
self.arg_params['cls_prob_label'] = mx.nd.zeros(arg_shapes_dict['cls_prob_label'], self.ctx)
-
aux_names = self.symbol.list_auxiliary_states()
self.aux_params = {k: mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
+
+ # execute
self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=None,
grad_req='null', aux_states=self.aux_params)
output_dict = {name: nd for name, nd in zip(self.symbol.list_outputs(), self.executor.outputs)}
-
self.executor.forward(is_train=False)
- scores = output_dict['cls_prob_output'].asnumpy()
- bbox_deltas = output_dict['bbox_pred_output'].asnumpy()
- pred_boxes = bbox_pred(roi_array[:, 1:], bbox_deltas)
+ # save output
+ scores = output_dict['cls_prob_reshape_output'].asnumpy()[0]
+ bbox_deltas = output_dict['bbox_pred_reshape_output'].asnumpy()[0]
+ if config.TEST.HAS_RPN:
+ rois = output_dict['rois_output'].asnumpy()
+ rois = rois[:, 1:].copy() # scale back
+ else:
+ rois = roi_array[:, 1:]
+
+ # post processing
+ pred_boxes = bbox_pred(rois, bbox_deltas)
pred_boxes = clip_boxes(pred_boxes, im_array[0].shape[-2:])
- if config.TEST.DEDUP_BOXES > 0:
+ if config.TEST.DEDUP_BOXES > 0 and not config.TEST.HAS_RPN:
# map back to original
scores = scores[inv_index, :]
pred_boxes = pred_boxes[inv_index, :]
diff --git a/example/rcnn/rcnn/loader.py b/example/rcnn/rcnn/loader.py
new file mode 100644
index 000000000000..cea0900245a3
--- /dev/null
+++ b/example/rcnn/rcnn/loader.py
@@ -0,0 +1,298 @@
+import mxnet as mx
+import numpy as np
+import minibatch
+from config import config
+from mxnet.executor_manager import _split_input_slice
+from helper.processing.image_processing import tensor_vstack
+
+
+class ROIIter(mx.io.DataIter):
+ def __init__(self, roidb, batch_size=2, shuffle=False, mode='train', ctx=None, work_load_list=None):
+ """
+ This Iter will provide roi data to Fast R-CNN network
+ :param roidb: must be preprocessed
+ :param batch_size: must divide BATCH_SIZE(128)
+ :param shuffle: bool
+ :param mode: control returned info
+ :param ctx: list of contexts
+ :param work_load_list: list of work load
+ :return: ROIIter
+ """
+ super(ROIIter, self).__init__()
+
+ self.roidb = roidb
+ self.batch_size = batch_size
+ self.shuffle = shuffle
+ self.mode = mode
+ self.ctx = ctx
+ if self.ctx is None:
+ self.ctx = [mx.cpu()]
+ self.work_load_list = work_load_list
+
+ self.cur = 0
+ self.size = len(roidb)
+ self.index = np.arange(self.size)
+ self.num_classes = self.roidb[0]['gt_overlaps'].shape[1]
+ self.reset()
+
+ self.batch = None
+ self.data = None
+ self.label = None
+ self.get_batch()
+ self.data_name = ['data', 'rois']
+ self.label_name = ['label', 'bbox_target', 'bbox_inside_weight', 'bbox_outside_weight']
+
+ @property
+ def provide_data(self):
+ if self.mode == 'train':
+ return [('data', self.data[0].shape), ('rois', self.data[1].shape)]
+ else:
+ return [(k, v.shape) for k, v in self.data.items()]
+
+ @property
+ def provide_label(self):
+ if self.mode == 'train':
+ return [('label', self.label[0].shape),
+ ('bbox_target', self.label[1].shape),
+ ('bbox_inside_weight', self.label[2].shape),
+ ('bbox_outside_weight', self.label[3].shape)]
+ else:
+ return [(k, v.shape) for k, v in self.data.items()]
+
+ def reset(self):
+ self.cur = 0
+ if self.shuffle:
+ if config.TRAIN.ASPECT_GROUPING:
+ widths = np.array([r['width'] for r in self.roidb])
+ heights = np.array([r['height'] for r in self.roidb])
+ horz = (widths >= heights)
+ vert = np.logical_not(horz)
+ horz_inds = np.where(horz)[0]
+ vert_inds = np.where(vert)[0]
+ inds = np.hstack((np.random.permutation(horz_inds), np.random.permutation(vert_inds)))
+ inds = np.reshape(inds, (-1, 2))
+ row_perm = np.random.permutation(np.arange(inds.shape[0]))
+ inds = np.reshape(inds[row_perm, :], (-1, ))
+ self.index = inds
+ else:
+ np.random.shuffle(self.index)
+
+ def iter_next(self):
+ return self.cur + self.batch_size <= self.size
+
+ def next(self):
+ if self.iter_next():
+ self.get_batch()
+ self.cur += self.batch_size
+ return mx.io.DataBatch(data=self.data, label=self.label,
+ pad=self.getpad(), index=self.getindex(),
+ provide_data=self.provide_data, provide_label=self.provide_label)
+ else:
+ raise StopIteration
+
+ def getindex(self):
+ return self.cur / self.batch_size
+
+ def getpad(self):
+ if self.cur + self.batch_size > self.size:
+ return self.cur + self.batch_size - self.size
+ else:
+ return 0
+
+ def get_batch(self):
+ cur_from = self.cur
+ cur_to = min(cur_from + self.batch_size, self.size)
+ roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)]
+ if self.mode == 'test':
+ self.data, self.label = minibatch.get_minibatch(roidb, self.num_classes, self.mode)
+ else:
+ work_load_list = self.work_load_list
+ ctx = self.ctx
+ if work_load_list is None:
+ work_load_list = [1] * len(ctx)
+ assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
+ "Invalid settings for work load. "
+ slices = _split_input_slice(self.batch_size, work_load_list)
+
+ data_list = []
+ label_list = []
+ for islice in slices:
+ iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
+ data, label = minibatch.get_minibatch(iroidb, self.num_classes, self.mode)
+ data_list.append(data)
+ label_list.append(label)
+
+ all_data = dict()
+ for key in data_list[0].keys():
+ all_data[key] = tensor_vstack([batch[key] for batch in data_list])
+
+ all_label = dict()
+ for key in label_list[0].keys():
+ all_label[key] = tensor_vstack([batch[key] for batch in label_list])
+
+ self.data = [mx.nd.array(all_data['data']),
+ mx.nd.array(all_data['rois'])]
+ self.label = [mx.nd.array(all_label['label']),
+ mx.nd.array(all_label['bbox_target']),
+ mx.nd.array(all_label['bbox_inside_weight']),
+ mx.nd.array(all_label['bbox_outside_weight'])]
+
+
+class AnchorLoader(mx.io.DataIter):
+ def __init__(self, feat_sym, roidb, batch_size=1, shuffle=False, mode='train', ctx=None, work_load_list=None,
+ feat_stride=16, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2), allowed_border=0):
+ """
+ This Iter will provide roi data to Fast R-CNN network
+ :param feat_sym: to infer shape of assign_output
+ :param roidb: must be preprocessed
+ :param batch_size: must divide BATCH_SIZE(128)
+ :param shuffle: bool
+ :param mode: control returned info
+ :param ctx: list of contexts
+ :param work_load_list: list of work load
+ :return: AnchorLoader
+ """
+ super(AnchorLoader, self).__init__()
+
+ self.feat_sym = feat_sym
+ self.roidb = roidb
+ self.batch_size = batch_size
+ self.shuffle = shuffle
+ self.mode = mode
+ self.ctx = ctx
+ if self.ctx is None:
+ self.ctx = [mx.cpu()]
+ self.work_load_list = work_load_list
+ self.feat_stride = feat_stride
+ self.anchor_scales = anchor_scales
+ self.anchor_ratios = anchor_ratios
+ self.allowed_border = allowed_border
+
+ self.cur = 0
+ self.size = len(roidb)
+ self.index = np.arange(self.size)
+ self.num_classes = self.roidb[0]['gt_overlaps'].shape[1]
+ self.reset()
+
+ self.batch = None
+ self.data = None
+ self.label = None
+ self.get_batch()
+ self.data_name = ['data', 'im_info']
+ self.label_name = ['label', 'bbox_target', 'bbox_inside_weight', 'bbox_outside_weight']
+
+ @property
+ def provide_data(self):
+ if self.mode == 'train':
+ return [('data', self.data[0].shape)]
+ else:
+ return [(k, v.shape) for k, v in self.data.items()]
+
+ @property
+ def provide_label(self):
+ if self.mode == 'train':
+ return [('label', self.label[0].shape),
+ ('bbox_target', self.label[1].shape),
+ ('bbox_inside_weight', self.label[2].shape),
+ ('bbox_outside_weight', self.label[3].shape)]
+ else:
+ return [(k, v.shape) for k, v in self.data.items()]
+
+ def reset(self):
+ self.cur = 0
+ if self.shuffle:
+ if config.TRAIN.ASPECT_GROUPING:
+ widths = np.array([r['width'] for r in self.roidb])
+ heights = np.array([r['height'] for r in self.roidb])
+ horz = (widths >= heights)
+ vert = np.logical_not(horz)
+ horz_inds = np.where(horz)[0]
+ vert_inds = np.where(vert)[0]
+ inds = np.hstack((np.random.permutation(horz_inds), np.random.permutation(vert_inds)))
+ inds = np.reshape(inds, (-1, 2))
+ row_perm = np.random.permutation(np.arange(inds.shape[0]))
+ inds = np.reshape(inds[row_perm, :], (-1, ))
+ self.index = inds
+ else:
+ np.random.shuffle(self.index)
+
+ def iter_next(self):
+ return self.cur + self.batch_size <= self.size
+
+ def next(self):
+ if self.iter_next():
+ self.get_batch()
+ self.cur += self.batch_size
+ return mx.io.DataBatch(data=self.data, label=self.label,
+ pad=self.getpad(), index=self.getindex(),
+ provide_data=self.provide_data, provide_label=self.provide_label)
+ else:
+ raise StopIteration
+
+ def getindex(self):
+ return self.cur / self.batch_size
+
+ def getpad(self):
+ if self.cur + self.batch_size > self.size:
+ return self.cur + self.batch_size - self.size
+ else:
+ return 0
+
+ def get_batch(self):
+ cur_from = self.cur
+ cur_to = min(cur_from + self.batch_size, self.size)
+ roidb = [self.roidb[self.index[i]] for i in range(cur_from, cur_to)]
+ if self.mode == 'test':
+ self.data, self.label = minibatch.get_minibatch(roidb, self.num_classes, self.mode)
+ else:
+ work_load_list = self.work_load_list
+ ctx = self.ctx
+ if work_load_list is None:
+ work_load_list = [1] * len(ctx)
+ assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
+ "Invalid settings for work load. "
+ slices = _split_input_slice(self.batch_size, work_load_list)
+
+ data_list = []
+ label_list = []
+ for islice in slices:
+ iroidb = [roidb[i] for i in range(islice.start, islice.stop)]
+ data, label = minibatch.get_minibatch(iroidb, self.num_classes, self.mode)
+ data_list.append(data)
+ label_list.append(label)
+
+ # pad data first and then assign anchor (read label)
+ data_tensor = tensor_vstack([batch['data'] for batch in data_list])
+ for data, data_pad in zip(data_list, data_tensor):
+ data['data'] = data_pad[np.newaxis, :]
+
+ new_label_list = []
+ for data, label in zip(data_list, label_list):
+ # infer label shape
+ data_shape = {k: v.shape for k, v in data.items()}
+ del data_shape['im_info']
+ _, feat_shape, _ = self.feat_sym.infer_shape(**data_shape)
+ feat_shape = [int(i) for i in feat_shape[0]]
+
+ # assign anchor for label
+ label = minibatch.assign_anchor(feat_shape, label['gt_boxes'], data['im_info'],
+ self.feat_stride, self.anchor_scales,
+ self.anchor_ratios, self.allowed_border)
+ del data['im_info']
+ new_label_list.append(label)
+
+ all_data = dict()
+ for key in ['data']:
+ all_data[key] = tensor_vstack([batch[key] for batch in data_list])
+
+ all_label = dict()
+ all_label['label'] = tensor_vstack([batch['label'] for batch in new_label_list], pad=-1)
+ for key in ['bbox_target', 'bbox_inside_weight', 'bbox_outside_weight']:
+ all_label[key] = tensor_vstack([batch[key] for batch in new_label_list])
+
+ self.data = [mx.nd.array(all_data['data'])]
+
+ self.label = [mx.nd.array(all_label['label']),
+ mx.nd.array(all_label['bbox_target']),
+ mx.nd.array(all_label['bbox_inside_weight']),
+ mx.nd.array(all_label['bbox_outside_weight'])]
diff --git a/example/rcnn/rcnn/metric.py b/example/rcnn/rcnn/metric.py
index c31e5533c04b..b8bd90875604 100644
--- a/example/rcnn/rcnn/metric.py
+++ b/example/rcnn/rcnn/metric.py
@@ -4,14 +4,52 @@
from rcnn.config import config
+class AccuracyMetric(mx.metric.EvalMetric):
+ def __init__(self, use_ignore=False, ignore=None):
+ super(AccuracyMetric, self).__init__('Accuracy')
+ self.use_ignore = use_ignore
+ self.ignore = ignore
+ self.has_rpn = config.TRAIN.HAS_RPN
+ if self.has_rpn:
+ assert self.use_ignore and self.ignore is not None
+
+ def update(self, labels, preds):
+ if self.has_rpn:
+ pred_label = mx.ndarray.argmax_channel(preds[0]).asnumpy().astype('int32')
+ label = labels[0].asnumpy().astype('int32')
+ non_ignore_inds = np.where(label != self.ignore)
+ pred_label = pred_label[non_ignore_inds]
+ label = label[non_ignore_inds]
+ else:
+ last_dim = preds[0].shape[-1]
+ pred_label = preds[0].asnumpy().reshape(-1, last_dim).argmax(axis=1).astype('int32')
+ label = labels[0].asnumpy().reshape(-1,).astype('int32')
+
+ self.sum_metric += (pred_label.flat == label.flat).sum()
+ self.num_inst += len(pred_label.flat)
+
+
class LogLossMetric(mx.metric.EvalMetric):
- def __init__(self):
+ def __init__(self, use_ignore=False, ignore=None):
super(LogLossMetric, self).__init__('LogLoss')
+ self.use_ignore = use_ignore
+ self.ignore = ignore
+ self.has_rpn = config.TRAIN.HAS_RPN
+ if self.has_rpn:
+ assert self.use_ignore and self.ignore is not None
def update(self, labels, preds):
- pred_cls = preds[0].asnumpy()
- label = labels[0].asnumpy().astype('int32')
- cls = pred_cls[np.arange(label.shape[0]), label]
+ if self.has_rpn:
+ pred_cls = preds[0].asnumpy()[0]
+ label = labels[0].asnumpy().astype('int32')[0]
+ non_ignore_inds = np.where(label != self.ignore)[0]
+ label = label[non_ignore_inds]
+ cls = pred_cls[label, non_ignore_inds]
+ else:
+ last_dim = preds[0].shape[-1]
+ pred_cls = preds[0].asnumpy().reshape(-1, last_dim)
+ label = labels[0].asnumpy().reshape(-1,).astype('int32')
+ cls = pred_cls[np.arange(label.shape[0]), label]
cls += config.EPS
cls_loss = -1 * np.log(cls)
cls_loss = np.sum(cls_loss)
@@ -22,22 +60,15 @@ def update(self, labels, preds):
class SmoothL1LossMetric(mx.metric.EvalMetric):
def __init__(self):
super(SmoothL1LossMetric, self).__init__('SmoothL1Loss')
+ self.has_rpn = config.TRAIN.HAS_RPN
def update(self, labels, preds):
bbox_loss = preds[1].asnumpy()
- label = labels[1].asnumpy()
+ if self.has_rpn:
+ bbox_loss = bbox_loss.reshape((bbox_loss.shape[0], -1))
+ else:
+ first_dim = bbox_loss.shape[0] * bbox_loss.shape[1]
+ bbox_loss = bbox_loss.reshape(first_dim, -1)
+ self.num_inst += bbox_loss.shape[0]
bbox_loss = np.sum(bbox_loss)
self.sum_metric += bbox_loss
- self.num_inst += label.shape[0]
-
-
-class Accuracy(mx.metric.EvalMetric):
- def __init__(self):
- super(Accuracy, self).__init__('accuracy')
-
- def update(self, labels, preds):
- pred_label = mx.ndarray.argmax_channel(preds[0]).asnumpy().astype('int32')
- label = labels[0].asnumpy().astype('int32')
-
- self.sum_metric += (pred_label.flat == label.flat).sum()
- self.num_inst += len(pred_label.flat)
diff --git a/example/rcnn/rcnn/minibatch.py b/example/rcnn/rcnn/minibatch.py
index b160ff96700b..920d27eef22b 100644
--- a/example/rcnn/rcnn/minibatch.py
+++ b/example/rcnn/rcnn/minibatch.py
@@ -1,18 +1,24 @@
"""
To construct data iterator from imdb, batch sampling procedure are defined here
-training minibatch =
+RPN:
+data =
{'data': [num_images, c, h, w],
- 'rois': [num_rois, 5],
- 'labels': [num_rois],
- 'bbox_targets': [num_rois, 4 * num_classes],
- 'bbox_inside_weights': [num_rois, 4 * num_classes],
- 'bbox_outside_weights': [num_rois, 4 * num_classes]}
- num_images should divide config['TRAIN_BATCH_SIZE'] and num_rois = config['TRAIN_BATCH_SIZE'] / num_images
-validation minibatch is similar except num_images = 1 and num_rois = all rois
-testing minibatch =
+ 'im_info': [num_images, 4] (optional)}
+label =
+prototype: {'gt_boxes': [num_boxes, 5]}
+final: {'label': [batch_size, 1] <- [batch_size, num_anchors, feat_height, feat_width],
+ 'bbox_target': [batch_size, num_anchors, feat_height, feat_width],
+ 'bbox_inside_weight': [batch_size, num_anchors, feat_height, feat_width],
+ 'bbox_outside_weight': [batch_size, num_anchors, feat_height, feat_width]}
+Fast R-CNN:
+data =
{'data': [num_images, c, h, w],
- 'rois': [num_rois, 5]}
- num_images = 1 and num_rois = all rois
+ 'rois': [num_images, num_rois, 5]}
+label =
+ {'label': [num_images, num_rois],
+ 'bbox_target': [num_images, num_rois, 4 * num_classes],
+ 'bbox_inside_weight': [num_images, num_rois, 4 * num_classes],
+ 'bbox_outside_weight': [num_images, num_rois, 4 * num_classes]}
"""
import cv2
@@ -21,85 +27,101 @@
from helper.processing import image_processing
from helper.processing.bbox_regression import expand_bbox_regression_targets
+from helper.processing.generate_anchor import generate_anchors
+from helper.processing.bbox_regression import bbox_overlaps
+from helper.processing.bbox_transform import bbox_transform
from rcnn.config import config
-def get_minibatch(roidb, num_classes, ctx):
+
+def get_minibatch(roidb, num_classes, mode='test'):
"""
return minibatch of images in roidb
- :param roidb: subset of main database
- :param num_classes: number of classes is used in bbox regression targets
- :return: minibatch: {'data', 'rois', 'labels', 'bbox_targets', 'bbox_inside_weights', 'bbox_outside_weights'}
- """
- num_images = len(roidb)
- random_scale_indexes = npr.randint(0, high=len(config.TRAIN.SCALES), size=num_images)
- assert config.TRAIN.BATCH_SIZE % num_images == 0, \
- 'num_images {} must devide BATCHSIZE {}'.format(num_images, config.TRAIN.BATCH_SIZE)
- rois_per_image = config.TRAIN.BATCH_SIZE / num_images
- fg_rois_per_image = np.round(config.TRAIN.FG_FRACTION * rois_per_image).astype(int)
-
- # im_array: [num_images, c, h, w]
- im_array, im_scales = get_image_array(roidb, config.TRAIN.SCALES, random_scale_indexes)
- rois_array = list()
- labels_array = list()
- bbox_targets_array = list()
- bbox_inside_array = list()
-
- for im_i in range(num_images):
- im_rois, labels, bbox_targets, bbox_inside_weights, overlaps = \
- sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes)
-
- # project im_rois
- # do not round roi
- rois = im_rois * im_scales[im_i]
- batch_index = im_i * np.ones((rois.shape[0], 1))
- rois_array_this_image = np.hstack((batch_index, rois))
- rois_array.append(rois_array_this_image)
-
- # add labels
- labels_array.append(labels)
- bbox_targets_array.append(bbox_targets)
- bbox_inside_array.append(bbox_inside_weights)
-
- rois_array = np.vstack(rois_array)
- labels_array = np.hstack(labels_array)
- bbox_targets_array = np.vstack(bbox_targets_array)
- bbox_inside_array = np.vstack(bbox_inside_array)
- bbox_outside_array = np.array(bbox_inside_array > 0).astype(np.float32)
-
- minibatch = {'data': im_array,
- 'rois': rois_array,
- 'labels': labels_array,
- 'bbox_targets': bbox_targets_array,
- 'bbox_inside_weights': bbox_inside_array,
- 'bbox_outside_weights': bbox_outside_array}
- return minibatch
-
-
-def get_testbatch(roidb, num_classes):
- """
- return test batch of given roidb
- actually, there is only one testing scale and len(roidb) is 1
- :param roidb: subset of main database
+ :param roidb: a list of dict, whose length controls batch size
:param num_classes: number of classes is used in bbox regression targets
- :return: minibatch: {'data', 'rois'}
+ :param mode: controls whether blank label are returned
+ :return: data, label
"""
+ # build im_array: [num_images, c, h, w]
num_images = len(roidb)
- random_scale_indexes = npr.randint(0, high=len(config.TEST.SCALES), size=num_images)
- im_array, im_scales = get_image_array(roidb, config.TEST.SCALES, random_scale_indexes)
+ random_scale_indexes = npr.randint(0, high=len(config.SCALES), size=num_images)
+ im_array, im_scales = get_image_array(roidb, config.SCALES, random_scale_indexes)
+
+ if mode == 'train':
+ cfg_key = 'TRAIN'
+ else:
+ cfg_key = 'TEST'
+
+ if config[cfg_key].HAS_RPN:
+ assert len(roidb) == 1, 'Single batch only'
+ assert len(im_scales) == 1, 'Single batch only'
+ im_info = np.array([[im_array.shape[2], im_array.shape[3], im_scales[0]]], dtype=np.float32)
+
+ data = {'data': im_array,
+ 'im_info': im_info}
+ label = {}
+
+ if mode == 'train':
+ # gt boxes: (x1, y1, x2, y2, cls)
+ gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
+ gt_boxes = np.empty((roidb[0]['boxes'].shape[0], 5), dtype=np.float32)
+ gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
+ gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
+ label = {'gt_boxes': gt_boxes}
+ else:
+ if mode == 'train':
+ assert config.TRAIN.BATCH_SIZE % config.TRAIN.BATCH_IMAGES == 0, \
+ 'BATCHIMAGES {} must devide BATCHSIZE {}'.format(config.TRAIN.BATCH_IMAGES, config.TRAIN.BATCH_SIZE)
+ rois_per_image = config.TRAIN.BATCH_SIZE / config.TRAIN.BATCH_IMAGES
+ fg_rois_per_image = np.round(config.TRAIN.FG_FRACTION * rois_per_image).astype(int)
+
+ rois_array = list()
+ labels_array = list()
+ bbox_targets_array = list()
+ bbox_inside_array = list()
+
+ for im_i in range(num_images):
+ im_rois, labels, bbox_targets, bbox_inside_weights, overlaps = \
+ sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes)
- rois_array = list()
- for im_i in range(num_images):
- im_rois = roidb[im_i]['boxes']
- rois = im_rois * im_scales[im_i]
- batch_index = im_i * np.ones((rois.shape[0], 1))
- rois_array_this_image = np.hstack((batch_index, rois))
- rois_array.append(rois_array_this_image)
+ # project im_rois
+ # do not round roi
+ rois = im_rois * im_scales[im_i]
+ batch_index = im_i * np.ones((rois.shape[0], 1))
+ rois_array_this_image = np.hstack((batch_index, rois))
+ rois_array.append(rois_array_this_image)
- rois_array = np.vstack(rois_array)
+ # add labels
+ labels_array.append(labels)
+ bbox_targets_array.append(bbox_targets)
+ bbox_inside_array.append(bbox_inside_weights)
- testbatch = {'data': im_array,
- 'rois': rois_array}
- return testbatch
+ rois_array = np.array(rois_array)
+ labels_array = np.array(labels_array)
+ bbox_targets_array = np.array(bbox_targets_array)
+ bbox_inside_array = np.array(bbox_inside_array)
+ bbox_outside_array = np.array(bbox_inside_array > 0).astype(np.float32)
+
+ data = {'data': im_array,
+ 'rois': rois_array}
+ label = {'label': labels_array,
+ 'bbox_target': bbox_targets_array,
+ 'bbox_inside_weight': bbox_inside_array,
+ 'bbox_outside_weight': bbox_outside_array}
+ else:
+ rois_array = list()
+ for im_i in range(num_images):
+ im_rois = roidb[im_i]['boxes']
+ rois = im_rois * im_scales[im_i]
+ batch_index = im_i * np.ones((rois.shape[0], 1))
+ rois_array_this_image = np.hstack((batch_index, rois))
+ rois_array.append(rois_array_this_image)
+ rois_array = np.vstack(rois_array)
+
+ data = {'data': im_array,
+ 'rois': rois_array}
+ label = {}
+
+ return data, label
def get_image_array(roidb, scales, scale_indexes):
@@ -118,7 +140,7 @@ def get_image_array(roidb, scales, scale_indexes):
if roidb[i]['flipped']:
im = im[:, ::-1, :]
target_size = scales[scale_indexes[i]]
- im, im_scale = image_processing.resize(im, target_size, config.TRAIN.MAX_SIZE)
+ im, im_scale = image_processing.resize(im, target_size, config.MAX_SIZE)
im_tensor = image_processing.transform(im, config.PIXEL_MEANS)
processed_ims.append(im_tensor)
im_scales.append(im_scale)
@@ -177,3 +199,193 @@ def sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
expand_bbox_regression_targets(roidb['bbox_targets'][keep_indexes, :], num_classes)
return rois, labels, bbox_targets, bbox_inside_weights, overlaps
+
+
+def assign_anchor(feat_shape, gt_boxes, im_info, feat_stride=16,
+ scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0):
+ """
+ assign ground truth boxes to anchor positions
+ :param feat_shape: infer output shape
+ :param gt_boxes: assign ground truth
+ :param im_info: filter out anchors overlapped with edges
+ :param feat_stride: anchor position step
+ :param scales: used to generate anchors, affects num_anchors (per location)
+ :param ratios: aspect ratios of generated anchors
+ :param allowed_border: filter out anchors with edge overlap > allowed_border
+ :return: dict of label
+ 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
+ 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
+ 'bbox_inside_weight': *todo* mark the assigned anchors
+ 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
+ """
+ def _unmap(data, count, inds, fill=0):
+ """" unmap a subset inds of data into original data of size count """
+ if len(data.shape) == 1:
+ ret = np.empty((count,), dtype=np.float32)
+ ret.fill(fill)
+ ret[inds] = data
+ else:
+ ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
+ ret.fill(fill)
+ ret[inds, :] = data
+ return ret
+
+ def _compute_targets(ex_rois, gt_rois):
+ """ compute bbox targets for an image """
+ assert ex_rois.shape[0] == gt_rois.shape[0]
+ assert ex_rois.shape[1] == 4
+ assert gt_rois.shape[1] == 5
+
+ return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
+
+ DEBUG = False
+ im_info = im_info[0]
+ scales = np.array(scales, dtype=np.float32)
+ base_anchors = generate_anchors(base_size=16, ratios=list(ratios), scales=scales)
+ num_anchors = base_anchors.shape[0]
+ feat_height, feat_width = feat_shape[-2:]
+
+ if DEBUG:
+ print 'anchors:'
+ print base_anchors
+ print 'anchor shapes:'
+ print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
+ base_anchors[:, 3::4] - base_anchors[:, 1::4]))
+ print 'im_info', im_info
+ print 'height', feat_height, 'width', feat_width
+ print 'gt_boxes shape', gt_boxes.shape
+ print 'gt_boxes', gt_boxes
+
+ # 1. generate proposals from bbox deltas and shifted anchors
+ shift_x = np.arange(0, feat_width) * feat_stride
+ shift_y = np.arange(0, feat_height) * feat_stride
+ shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+ shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
+ # add A anchors (1, A, 4) to
+ # cell K shifts (K, 1, 4) to get
+ # shift anchors (K, A, 4)
+ # reshape to (K*A, 4) shifted anchors
+ A = num_anchors
+ K = shifts.shape[0]
+ all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+ all_anchors = all_anchors.reshape((K * A, 4))
+ total_anchors = int(K * A)
+
+ # only keep anchors inside the image
+ inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
+ (all_anchors[:, 1] >= -allowed_border) &
+ (all_anchors[:, 2] < im_info[1] + allowed_border) &
+ (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
+ if DEBUG:
+ print 'total_anchors', total_anchors
+ print 'inds_inside', len(inds_inside)
+
+ # keep only inside anchors
+ anchors = all_anchors[inds_inside, :]
+ if DEBUG:
+ print 'anchors shape', anchors.shape
+
+ # label: 1 is positive, 0 is negative, -1 is dont care
+ labels = np.empty((len(inds_inside),), dtype=np.float32)
+ labels.fill(-1)
+
+ if gt_boxes.size > 0:
+ # overlap between the anchors and the gt boxes
+ # overlaps (ex, gt)
+ overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
+ argmax_overlaps = overlaps.argmax(axis=1)
+ max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
+ gt_argmax_overlaps = overlaps.argmax(axis=0)
+ gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
+ gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
+
+ if not config.TRAIN.RPN_CLOBBER_POSITIVES:
+ # assign bg labels first so that positive labels can clobber them
+ labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+
+ # fg label: for each gt, anchor with highest overlap
+ labels[gt_argmax_overlaps] = 1
+
+ # fg label: above threshold IoU
+ labels[max_overlaps >= config.TRAIN.RPN_POSITIVE_OVERLAP] = 1
+
+ if config.TRAIN.RPN_CLOBBER_POSITIVES:
+ # assign bg labels last so that negative labels can clobber positives
+ labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
+ else:
+ labels[:] = 0
+
+ # subsample positive labels if we have too many
+ num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE)
+ fg_inds = np.where(labels == 1)[0]
+ if len(fg_inds) > num_fg:
+ disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+ if DEBUG:
+ disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
+ labels[disable_inds] = -1
+
+ # subsample negative labels if we have too many
+ num_bg = config.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
+ bg_inds = np.where(labels == 0)[0]
+ if len(bg_inds) > num_bg:
+ disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
+ if DEBUG:
+ disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
+ labels[disable_inds] = -1
+
+ bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
+ if gt_boxes.size > 0:
+ bbox_targets[:] = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
+
+ bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+ bbox_inside_weights[labels == 1, :] = np.array(config.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
+
+ bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
+ if config.TRAIN.RPN_POSITIVE_WEIGHT < 0:
+ # uniform weighting of exampling (given non-uniform sampling)
+ num_examples = np.sum(labels >= 0)
+ positive_weights = np.ones((1, 4)) * 1.0 / num_examples
+ negative_weights = np.ones((1, 4)) * 1.0 / num_examples
+ else:
+ assert ((config.TRAIN.RPN_POSTIVE_WEIGHT > 0) & (config.TRAIN.RPN_POSTIVE_WEIGHT < 1))
+ positive_weights = config.TRAIN.RPN_POSTIVE_WEIGHT / np.sum(labels == 1)
+ negative_weights = (1.0 - config.TRAIN.RPN_POSTIVE_WEIGHT) / np.sum(labels == 1)
+ bbox_outside_weights[labels == 1, :] = positive_weights
+ bbox_outside_weights[labels == 0, :] = negative_weights
+
+ if DEBUG:
+ _sums = bbox_targets[labels == 1, :].sum(axis=0)
+ _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
+ _counts = config.EPS + np.sum(labels == 1)
+ means = _sums / _counts
+ stds = np.sqrt(_squared_sums / _counts - means ** 2)
+ print 'means', means
+ print 'stdevs', stds
+
+ # map up to original set of anchors
+ labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
+ bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
+ bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
+ bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
+
+ if DEBUG:
+ print 'rpn: max max_overlaps', np.max(max_overlaps)
+ print 'rpn: num_positives', np.sum(labels == 1)
+ print 'rpn: num_negatives', np.sum(labels == 0)
+ _fg_sum = np.sum(labels == 1)
+ _bg_sum = np.sum(labels == 0)
+ _count = 1
+ print 'rpn: num_positive avg', _fg_sum / _count
+ print 'rpn: num_negative avg', _bg_sum / _count
+
+ labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
+ labels = labels.reshape((1, A * feat_height * feat_width))
+ bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
+ bbox_inside_weights = bbox_inside_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
+ bbox_outside_weights = bbox_outside_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
+
+ label = {'label': labels,
+ 'bbox_target': bbox_targets,
+ 'bbox_inside_weight': bbox_inside_weights,
+ 'bbox_outside_weight': bbox_outside_weights}
+ return label
diff --git a/example/rcnn/rcnn/module.py b/example/rcnn/rcnn/module.py
new file mode 100644
index 000000000000..6b5aef1d3d51
--- /dev/null
+++ b/example/rcnn/rcnn/module.py
@@ -0,0 +1,195 @@
+"""A `MutableModule` implement the `BaseModule` API, and allows input shape
+varying with training iterations. If shapes vary, executors will rebind,
+using shared arrays from the initial module binded with maximum shape.
+"""
+
+import logging
+
+from mxnet import context as ctx
+from mxnet.initializer import Uniform
+from mxnet.module.base_module import BaseModule
+from mxnet.module.module import Module
+
+class MutableModule(BaseModule):
+ """A mutable module is a module that supports variable input data.
+
+ Parameters
+ ----------
+ symbol : Symbol
+ data_names : list of str
+ label_names : list of str
+ logger : Logger
+ context : Context or list of Context
+ work_load_list : list of number
+ max_data_shapes : list of (name, shape) tuple, designating inputs whose shape vary
+ max_label_shapes : list of (name, shape) tuple, designating inputs whose shape vary
+ fixed_param_prefix : list of str, indicating fixed parameters
+ """
+ def __init__(self, symbol, data_names, label_names,
+ logger=logging, context=ctx.cpu(), work_load_list=None,
+ max_data_shapes=None, max_label_shapes=None, fixed_param_prefix=None):
+ super(MutableModule, self).__init__(logger=logger)
+ self._symbol = symbol
+ self._data_names = data_names
+ self._label_names = label_names
+ self._context = context
+ self._work_load_list = work_load_list
+
+ self._curr_module = None
+ self._max_data_shapes = max_data_shapes
+ self._max_label_shapes = max_label_shapes
+ self._fixed_param_prefix = fixed_param_prefix
+
+ if self._max_data_shapes is None:
+ self._max_data_shapes = []
+ if self._max_label_shapes is None:
+ self._max_label_shapes = []
+ if self._fixed_param_prefix is None:
+ self._fixed_param_prefix = []
+
+ fixed_param_names = list()
+ for name in self._symbol.list_arguments():
+ for prefix in self._fixed_param_prefix:
+ if prefix in name:
+ fixed_param_names.append(name)
+ self._fixed_param_names = fixed_param_names
+
+ def _reset_bind(self):
+ self.binded = False
+ self._curr_module = None
+
+ @property
+ def data_names(self):
+ return self._data_names
+
+ @property
+ def output_names(self):
+ return self._symbol.list_outputs()
+
+ @property
+ def data_shapes(self):
+ assert self.binded
+ return self._curr_module.data_shapes
+
+ @property
+ def label_shapes(self):
+ assert self.binded
+ return self._curr_module.label_shapes
+
+ @property
+ def output_shapes(self):
+ assert self.binded
+ return self._curr_module.output_shapes
+
+ def get_params(self):
+ assert self.binded and self.params_initialized
+ return self._curr_module.get_params()
+
+ def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
+ allow_missing=False, force_init=False):
+ if self.params_initialized and not force_init:
+ return
+ assert self.binded, 'call bind before initializing the parameters'
+ self._curr_module.init_params(initializer=initializer, arg_params=arg_params,
+ aux_params=aux_params, allow_missing=allow_missing,
+ force_init=force_init)
+ self.params_initialized = True
+
+ def bind(self, data_shapes, label_shapes=None, for_training=True,
+ inputs_need_grad=False, force_rebind=False, shared_module=None):
+ # in case we already initialized params, keep it
+ if self.params_initialized:
+ arg_params, aux_params = self.get_params()
+
+ # force rebinding is typically used when one want to switch from
+ # training to prediction phase.
+ if force_rebind:
+ self._reset_bind()
+
+ if self.binded:
+ self.logger.warning('Already binded, ignoring bind()')
+ return
+
+ assert shared_module is None, 'shared_module for MutableModule is not supported'
+
+ self.for_training = for_training
+ self.inputs_need_grad = inputs_need_grad
+ self.binded = True
+
+ max_shapes_dict = dict(self._max_data_shapes + self._max_label_shapes)
+ max_data_shapes = list()
+ for name, shape in data_shapes:
+ if name in max_shapes_dict:
+ max_data_shapes.append((name, max_shapes_dict[name]))
+ else:
+ max_data_shapes.append((name, shape))
+ max_label_shapes = list()
+ for name, shape in label_shapes:
+ if name in max_shapes_dict:
+ max_label_shapes.append((name, max_shapes_dict[name]))
+ else:
+ max_label_shapes.append((name, shape))
+
+ module = Module(self._symbol, self._data_names, self._label_names, logger=self.logger,
+ context=self._context, work_load_list=self._work_load_list,
+ fixed_param_names=self._fixed_param_names)
+ module.bind(max_data_shapes, max_label_shapes, for_training, inputs_need_grad,
+ force_rebind=False, shared_module=None)
+ self._curr_module = module
+
+ # copy back saved params, if already initialized
+ if self.params_initialized:
+ self.set_params(arg_params, aux_params)
+
+ def init_optimizer(self, kvstore='local', optimizer='sgd',
+ optimizer_params=(('learning_rate', 0.01),), force_init=False):
+ assert self.binded and self.params_initialized
+ if self.optimizer_initialized and not force_init:
+ self.logger.warning('optimizer already initialized, ignoring.')
+ return
+
+ self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
+ force_init=force_init)
+ self.optimizer_initialized = True
+
+ def forward(self, data_batch, is_train=None):
+ assert self.binded and self.params_initialized
+
+ shape_changed = False
+ current_shapes = dict(self._curr_module.data_shapes + self._curr_module.label_shapes)
+ input_shapes = dict(data_batch.provide_data + data_batch.provide_label)
+ for k, v in current_shapes.items():
+ if v != input_shapes[k]:
+ shape_changed = True
+
+ if shape_changed:
+ module = Module(self._symbol, self._data_names, self._label_names,
+ logger=self.logger, context=self._context,
+ work_load_list=self._work_load_list,
+ fixed_param_names=self._fixed_param_names)
+ module.bind(data_batch.provide_data, data_batch.provide_label, self._curr_module.for_training,
+ self._curr_module.inputs_need_grad, force_rebind=False,
+ shared_module=self._curr_module)
+ self._curr_module = module
+
+ self._curr_module.forward(data_batch, is_train=is_train)
+
+ def backward(self, out_grads=None):
+ assert self.binded and self.params_initialized
+ self._curr_module.backward(out_grads=out_grads)
+
+ def update(self):
+ assert self.binded and self.params_initialized and self.optimizer_initialized
+ self._curr_module.update()
+
+ def get_outputs(self, merge_multi_context=True):
+ assert self.binded and self.params_initialized
+ return self._curr_module.get_outputs(merge_multi_context=merge_multi_context)
+
+ def get_input_grads(self, merge_multi_context=True):
+ assert self.binded and self.params_initialized and self.inputs_need_grad
+ return self._curr_module.get_input_grads(merge_multi_context=merge_multi_context)
+
+ def update_metric(self, eval_metric, labels):
+ assert self.binded and self.params_initialized
+ self._curr_module.update_metric(eval_metric, labels)
diff --git a/example/rcnn/rcnn/rpn/__init__.py b/example/rcnn/rcnn/rpn/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/example/rcnn/rcnn/rpn/generate.py b/example/rcnn/rcnn/rpn/generate.py
new file mode 100644
index 000000000000..f1c8ddbef2e3
--- /dev/null
+++ b/example/rcnn/rcnn/rpn/generate.py
@@ -0,0 +1,116 @@
+import mxnet as mx
+import numpy as np
+import os
+import cPickle
+
+
+class Detector(object):
+ def __init__(self, symbol, ctx=None,
+ arg_params=None, aux_params=None):
+ self.symbol = symbol
+ self.ctx = ctx
+ if self.ctx is None:
+ self.ctx = mx.cpu()
+ self.executor = None
+ self.arg_params = arg_params
+ self.aux_params = aux_params
+
+ def im_detect(self, im, im_info):
+ """
+ perform detection of im, im_info
+ :param im: numpy.ndarray [b, c, h, w]
+ :param im_info: numpy.ndarray [b, 3]
+ :return: boxes [b, 5], scores [b,]
+ """
+ self.arg_params['data'] = mx.nd.array(im, self.ctx)
+ self.arg_params['im_info'] = mx.nd.array(im_info, self.ctx)
+ arg_shapes, out_shapes, aux_shapes = \
+ self.symbol.infer_shape(data=self.arg_params['data'].shape, im_info=self.arg_params['im_info'].shape)
+ aux_names = self.symbol.list_auxiliary_states()
+ self.aux_params = {k: mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
+ self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=None,
+ grad_req='null', aux_states=self.aux_params)
+ output_dict = {name: nd for name, nd in zip(self.symbol.list_outputs(), self.executor.outputs)}
+
+ self.executor.forward(is_train=False)
+ boxes = output_dict['rois_output'].asnumpy()
+ scores = output_dict['rois_score'].asnumpy()
+
+ return boxes, scores
+
+
+def generate_detections(detector, test_data, imdb, vis=False):
+ """
+ Generate detections results using RPN.
+ :param detector: Detector
+ :param test_data: data iterator, must be non-shuffled
+ :param imdb: image database
+ :param vis: controls visualization
+ :return: list of detected boxes
+ """
+ assert not test_data.shuffle
+
+ i = 0
+ imdb_boxes = list()
+ for databatch in test_data:
+ if i % 10 == 0:
+ print 'generating detections {}/{}'.format(i, imdb.num_images)
+
+ boxes, scores = detector.im_detect(databatch.data['data'], databatch.data['im_info'])
+ scale = databatch.data['im_info'][0, 2]
+ # drop the batch index
+ boxes = boxes[:, 1:].copy() / scale
+ imdb_boxes.append(boxes)
+ if vis:
+ dets = np.hstack((boxes * scale, scores))
+ vis_detection(databatch.data['data'], dets, thresh=0.9)
+ i += 1
+
+ assert len(imdb_boxes) == imdb.num_images, 'calculations not complete'
+ rpn_folder = os.path.join(imdb.root_path, 'rpn_data')
+ if not os.path.exists(rpn_folder):
+ os.mkdir(rpn_folder)
+ rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
+ with open(rpn_file, 'wb') as f:
+ cPickle.dump(imdb_boxes, f, cPickle.HIGHEST_PROTOCOL)
+ print 'wrote rpn proposals to {}'.format(rpn_file)
+ return imdb_boxes
+
+
+def vis_detection(im, dets, thresh=0.):
+ """
+ draw detected bounding boxes
+ :param im: [b, c, h, w] oin rgb
+ :param dets: only one class, [N * [4 coordinates score]]
+ :param thresh: thresh for valid detections
+ :return:
+ """
+ from rcnn.config import config
+ from helper.processing.image_processing import transform_inverse
+ import matplotlib.pyplot as plt
+ inds = np.where(dets[:, -1] >= thresh)[0]
+ if len(inds) == 0:
+ return
+ inds = np.argsort(dets[:, -1])[::-1]
+ inds = inds[:20]
+
+ class_name = 'obj'
+ fig, ax = plt.subplots(figsize=(12, 12))
+ im = transform_inverse(im, config.PIXEL_MEANS)
+ ax.imshow(im, aspect='equal')
+ for i in inds:
+ bbox = dets[i, :4]
+ score = dets[i, -1]
+ rect = plt.Rectangle((bbox[0], bbox[1]),
+ bbox[2] - bbox[0],
+ bbox[3] - bbox[1], fill=False,
+ edgecolor='red', linewidth=3.5)
+ ax.add_patch(rect)
+ ax.text(bbox[0], bbox[1] - 2,
+ '{:s} {:3f}'.format(class_name, score),
+ bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white')
+ ax.set_title('{} detections with p({} | box) >= {:.1f}'.format(class_name, class_name, thresh), fontsize=14)
+ plt.axis('off')
+ plt.tight_layout()
+ plt.draw()
+ plt.show()
diff --git a/example/rcnn/rcnn/rpn/proposal.py b/example/rcnn/rcnn/rpn/proposal.py
new file mode 100644
index 000000000000..b0303c5cfd84
--- /dev/null
+++ b/example/rcnn/rcnn/rpn/proposal.py
@@ -0,0 +1,206 @@
+"""
+Proposal Operator transform anchor coordinates into ROI coordinates with prediction results on
+classification probability and bounding box prediction results, and image size and scale information.
+"""
+
+import mxnet as mx
+import numpy as np
+import numpy.random as npr
+
+from rcnn.config import config
+from helper.processing.generate_anchor import generate_anchors
+from helper.processing.bbox_transform import bbox_pred, clip_boxes
+from helper.processing.nms import nms
+
+DEBUG = False
+
+
+class ProposalOperator(mx.operator.CustomOp):
+ def __init__(self, feat_stride, scales, ratios, is_train=False, output_score=False):
+ super(ProposalOperator, self).__init__()
+ self._feat_stride = float(feat_stride)
+ self._scales = np.fromstring(scales[1:-1], dtype=float, sep=',')
+ self._ratios = np.fromstring(ratios[1:-1], dtype=float, sep=',').tolist()
+ self._anchors = generate_anchors(base_size=self._feat_stride, scales=self._scales, ratios=self._ratios)
+ self._num_anchors = self._anchors.shape[0]
+ self._output_score = output_score
+
+ if DEBUG:
+ print 'feat_stride: {}'.format(self._feat_stride)
+ print 'anchors:'
+ print self._anchors
+
+ if is_train:
+ self.cfg_key = 'TRAIN'
+ else:
+ self.cfg_key = 'TEST'
+
+ def forward(self, is_train, req, in_data, out_data, aux):
+ # for each (H, W) location i
+ # generate A anchor boxes centered on cell i
+ # apply predicted bbox deltas at cell i to each of the A anchors
+ # clip predicted boxes to image
+ # remove predicted boxes with either height or width < threshold
+ # sort all (proposal, score) pairs by score from highest to lowest
+ # take top pre_nms_topN proposals before NMS
+ # apply NMS with threshold 0.7 to remaining proposals
+ # take after_nms_topN proposals after NMS
+ # return the top proposals (-> RoIs top, scores top)
+
+ pre_nms_topN = config[self.cfg_key].RPN_PRE_NMS_TOP_N
+ post_nms_topN = config[self.cfg_key].RPN_POST_NMS_TOP_N
+ nms_thresh = config[self.cfg_key].RPN_NMS_THRESH
+ min_size = config[self.cfg_key].RPN_MIN_SIZE
+
+ # the first set of anchors are background probabilities
+ # keep the second part
+ scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
+ bbox_deltas = in_data[1].asnumpy()
+ im_info = in_data[2].asnumpy()[0, :]
+
+ if DEBUG:
+ print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
+ print 'scale: {}'.format(im_info[2])
+
+ # 1. Generate proposals from bbox_deltas and shifted anchors
+ height, width = scores.shape[-2:]
+
+ if DEBUG:
+ print 'score map size: {}'.format(scores.shape)
+
+ # Enumerate all shifts
+ shift_x = np.arange(0, width) * self._feat_stride
+ shift_y = np.arange(0, height) * self._feat_stride
+ shift_x, shift_y = np.meshgrid(shift_x, shift_y)
+ shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
+
+ # Enumerate all shifted anchors:
+ #
+ # add A anchors (1, A, 4) to
+ # cell K shifts (K, 1, 4) to get
+ # shift anchors (K, A, 4)
+ # reshape to (K*A, 4) shifted anchors
+ A = self._num_anchors
+ K = shifts.shape[0]
+ anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+ anchors = anchors.reshape((K * A, 4))
+
+ # Transpose and reshape predicted bbox transformations to get them
+ # into the same order as the anchors:
+ #
+ # bbox deltas will be (1, 4 * A, H, W) format
+ # transpose to (1, H, W, 4 * A)
+ # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
+ # in slowest to fastest order
+ bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
+
+ # Same story for the scores:
+ #
+ # scores are (1, A, H, W) format
+ # transpose to (1, H, W, A)
+ # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
+ scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
+
+ # Convert anchors into proposals via bbox transformations
+ proposals = bbox_pred(anchors, bbox_deltas)
+
+ # 2. clip predicted boxes to image
+ proposals = clip_boxes(proposals, im_info[:2])
+
+ # 3. remove predicted boxes with either height or width < threshold
+ # (NOTE: convert min_size to input image scale stored in im_info[2])
+ keep = ProposalOperator._filter_boxes(proposals, min_size * im_info[2])
+ proposals = proposals[keep, :]
+ scores = scores[keep]
+
+ # 4. sort all (proposal, score) pairs by score from highest to lowest
+ # 5. take top pre_nms_topN (e.g. 6000)
+ order = scores.ravel().argsort()[::-1]
+ if pre_nms_topN > 0:
+ order = order[:pre_nms_topN]
+ proposals = proposals[order, :]
+ scores = scores[order]
+
+ # 6. apply nms (e.g. threshold = 0.7)
+ # 7. take after_nms_topN (e.g. 300)
+ # 8. return the top proposals (-> RoIs top)
+ keep = nms(np.hstack((proposals, scores)), nms_thresh)
+ if post_nms_topN > 0:
+ keep = keep[:post_nms_topN]
+ # pad to ensure output size remains unchanged
+ if len(keep) < post_nms_topN:
+ pad = npr.choice(keep, size=post_nms_topN - len(keep))
+ keep = np.hstack((keep, pad))
+ proposals = proposals[keep, :]
+ scores = scores[keep]
+
+ # Output rois array
+ # Our RPN implementation only supports a single input image, so all
+ # batch inds are 0
+ batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
+ blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
+ self.assign(out_data[0], req[0], blob)
+
+ if self._output_score:
+ self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
+
+ def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
+ pass
+
+ @staticmethod
+ def _filter_boxes(boxes, min_size):
+ """ Remove all boxes with any side smaller than min_size """
+ ws = boxes[:, 2] - boxes[:, 0] + 1
+ hs = boxes[:, 3] - boxes[:, 1] + 1
+ keep = np.where((ws >= min_size) & (hs >= min_size))[0]
+ return keep
+
+
+@mx.operator.register("proposal")
+class ProposalProp(mx.operator.CustomOpProp):
+ def __init__(self, feat_stride, scales, ratios, is_train=False, output_score=False):
+ super(ProposalProp, self).__init__(need_top_grad=False)
+ self._feat_stride = feat_stride
+ self._scales = scales
+ self._ratios = ratios
+ self._is_train = is_train
+ self._output_score = output_score
+
+ if self._is_train:
+ self.cfg_key = 'TRAIN'
+ else:
+ self.cfg_key = 'TEST'
+
+ def list_arguments(self):
+ return ['cls_prob', 'bbox_pred', 'im_info']
+
+ def list_outputs(self):
+ if self._output_score:
+ return ['output', 'score']
+ else:
+ return ['output']
+
+ def infer_shape(self, in_shape):
+ cfg_key = self.cfg_key
+ cls_prob_shape = in_shape[0]
+ bbox_pred_shape = in_shape[1]
+ assert cls_prob_shape[0] == bbox_pred_shape[0], 'ROI number does not equal in cls and reg'
+
+ batch_size = cls_prob_shape[0]
+ if batch_size > 1:
+ raise ValueError("Only single item batches are supported")
+
+ im_info_shape = (batch_size, 3)
+ output_shape = (config[cfg_key].RPN_POST_NMS_TOP_N, 5)
+ score_shape = (config[cfg_key].RPN_POST_NMS_TOP_N, 1)
+
+ if self._output_score:
+ return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape, score_shape]
+ else:
+ return [cls_prob_shape, bbox_pred_shape, im_info_shape], [output_shape]
+
+ def create_operator(self, ctx, shapes, dtypes):
+ return ProposalOperator(self._feat_stride, self._scales, self._ratios, self._is_train, self._output_score)
+
+ def declare_backward_dependency(self, out_grad, in_data, out_data):
+ return []
diff --git a/example/rcnn/rcnn/solver.py b/example/rcnn/rcnn/solver.py
deleted file mode 100644
index f59e9422b1c6..000000000000
--- a/example/rcnn/rcnn/solver.py
+++ /dev/null
@@ -1,87 +0,0 @@
-import mxnet as mx
-import logging
-import metric
-
-from collections import namedtuple
-from callback import Speedometer
-from config import config
-
-class Solver(object):
- def __init__(self, prefix,
- symbol, ctx=None,
- begin_epoch=0, num_epoch=None,
- kv_store='local',
- arg_params=None, aux_params=None,
- optimizer='sgd',
- max_data_shape=None, **kwargs):
- self.prefix = prefix
- self.symbol = symbol
- self.ctx = ctx
- if self.ctx is None:
- self.ctx = mx.cpu()
- self.begin_epoch = begin_epoch
- self.num_epoch = num_epoch
- self.kv_store = kv_store
- self.arg_params = arg_params
- self.aux_params = aux_params
- self.optimizer = optimizer
- self.updater = None
- self.max_data_shape = max_data_shape
- self.kwargs = kwargs.copy()
-
- self.arg_names = None
- self.param_names = None
- self.aux_names = None
-
- def get_params(self, grad_req):
- arg_names = self.symbol.list_arguments()
- self.arg_names = arg_names
- arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=(1, 3, 224, 224), rois=(1, 5))
- if grad_req != 'null':
- param_names = []
- for name, shape in zip(arg_names, arg_shapes):
- if not (name.endswith('data') or name.endswith('rois') or
- name.endswith('inside_weight') or name.endswith('outside_weight') or
- name.endswith('label') or name.endswith('target') or
- name.startswith('conv1') or name.startswith('conv2')):
- param_names.append(name)
- self.param_names = list(param_names)
- aux_names = self.symbol.list_auxiliary_states()
- self.aux_names = aux_names
- self.aux_params = {k: mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
-
- def fit(self, train_data,
- grad_req='write',
- frequent=20,
- logger=None):
- (kvstore, update_on_kvstore) = mx.model._create_kvstore(
- self.kv_store, len(self.ctx), self.arg_params)
- if logger is None:
- logger = logging
- logger.info('Start training with %s', str(self.ctx))
-
- batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
- epoch_end_callback = mx.callback.do_checkpoint(self.prefix)
-
- self.get_params(grad_req)
-
- eval_metric = metric.Accuracy()
- cls_metric = metric.LogLossMetric()
- bbox_metric = metric.SmoothL1LossMetric()
- eval_metrics = mx.metric.CompositeEvalMetric()
- for child_metric in [eval_metric, cls_metric, bbox_metric]:
- eval_metrics.add(child_metric)
- max_data_shape = self.max_data_shape
-
- self.optimizer = mx.optimizer.create(self.optimizer, rescale_grad=(1.0 / config.TRAIN.BATCH_SIZE), **self.kwargs)
- mx.model._train_multi_device(self.symbol, self.ctx, self.arg_names, self.param_names,
- self.aux_names, self.arg_params, self.aux_params,
- begin_epoch=self.begin_epoch, end_epoch=self.num_epoch,
- epoch_size=None, optimizer=self.optimizer,
- train_data=train_data, eval_data=None,
- eval_metric=eval_metrics,
- epoch_end_callback=epoch_end_callback,
- batch_end_callback=batch_end_callback,
- kvstore=kvstore, update_on_kvstore=update_on_kvstore,
- logger=logger, work_load_list=None, monitor=None,
- mutable_data_shape=True, max_data_shape=self.max_data_shape)
diff --git a/example/rcnn/rcnn/symbol.py b/example/rcnn/rcnn/symbol.py
index dcbbf53ece74..e483fdc4f0b2 100644
--- a/example/rcnn/rcnn/symbol.py
+++ b/example/rcnn/rcnn/symbol.py
@@ -1,18 +1,14 @@
import mxnet as mx
+import rpn.proposal
+from config import config
-def get_symbol_vgg(num_classes=21):
+def get_vgg_conv(data):
"""
- Fast R-CNN with VGG 16 conv layers
- :param num_classes: used to determine output size
+ shared convolutional layers
+ :param data: Symbol
:return: Symbol
"""
- data = mx.symbol.Variable(name="data")
- rois = mx.symbol.Variable(name='rois')
- cls_prob_label = mx.symbol.Variable(name='cls_prob_label')
- bbox_loss_target = mx.symbol.Variable(name='bbox_loss_target')
- bbox_loss_inside_weight = mx.symbol.Variable(name='bbox_loss_inside_weight')
- bbox_loss_outside_weight = mx.symbol.Variable(name='bbox_loss_outside_weight')
# group 1
conv1_1 = mx.symbol.Convolution(
data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
@@ -65,6 +61,34 @@ def get_symbol_vgg(num_classes=21):
conv5_3 = mx.symbol.Convolution(
data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+
+ return relu5_3
+
+
+def get_vgg_rcnn(num_classes=21):
+ """
+ Fast R-CNN with VGG 16 conv layers
+ :param num_classes: used to determine output size
+ :return: Symbol
+ """
+ data = mx.symbol.Variable(name="data")
+ rois = mx.symbol.Variable(name='rois')
+ label = mx.symbol.Variable(name='label')
+ bbox_target = mx.symbol.Variable(name='bbox_target')
+ bbox_inside_weight = mx.symbol.Variable(name='bbox_inside_weight')
+ bbox_outside_weight = mx.symbol.Variable(name='bbox_outside_weight')
+
+ # reshape input
+ rois = mx.symbol.Reshape(data=rois, shape=(-1, 5), name='rois_reshape')
+ label = mx.symbol.Reshape(data=label, shape=(-1, ), name='label_reshape')
+ bbox_target = mx.symbol.Reshape(data=bbox_target, shape=(-1, 4 * num_classes), name='bbox_target_reshape')
+ bbox_inside_weight = mx.symbol.Reshape(data=bbox_inside_weight, shape=(-1, 4 * num_classes), name='bbox_inside_weight_reshape')
+ bbox_outside_weight = mx.symbol.Reshape(data=bbox_outside_weight, shape=(-1, 4 * num_classes), name='bbox_outside_weight_reshape')
+
+ # shared convolutional layers
+ relu5_3 = get_vgg_conv(data)
+
+ # Fast R-CNN
pool5 = mx.symbol.ROIPooling(
name='roi_pool5', data=relu5_3, rois=rois, pooled_size=(7, 7), spatial_scale=0.0625)
# group 6
@@ -78,78 +102,39 @@ def get_symbol_vgg(num_classes=21):
drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
# classification
cls_score = mx.symbol.FullyConnected(name='cls_score', data=drop7, num_hidden=num_classes)
- cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score, label=cls_prob_label)
+ cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score, label=label)
# bounding box regression
bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=drop7, num_hidden=num_classes * 4)
- bbox_loss_ = bbox_loss_outside_weight * \
+ bbox_loss_ = bbox_outside_weight * \
mx.symbol.smooth_l1(name='bbox_loss_', scalar=1.0,
- data=bbox_loss_inside_weight * (bbox_pred - bbox_loss_target))
+ data=bbox_inside_weight * (bbox_pred - bbox_target))
bbox_loss = mx.sym.MakeLoss(name='bbox_loss', data=bbox_loss_)
+
+ # reshape output
+ cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(config.TRAIN.BATCH_IMAGES, -1, num_classes), name='cls_prob_reshape')
+ bbox_loss = mx.symbol.Reshape(data=bbox_loss, shape=(config.TRAIN.BATCH_IMAGES, -1, 4 * num_classes), name='bbox_loss_reshape')
+
# group output
group = mx.symbol.Group([cls_prob, bbox_loss])
return group
-def get_symbol_vgg_test(num_classes=21):
+def get_vgg_rcnn_test(num_classes=21):
"""
- Fast R-CNN test with VGG 16 conv layers
+ Fast R-CNN Network with VGG
:param num_classes: used to determine output size
:return: Symbol
"""
data = mx.symbol.Variable(name="data")
rois = mx.symbol.Variable(name='rois')
- # group 1
- conv1_1 = mx.symbol.Convolution(
- data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
- relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
- conv1_2 = mx.symbol.Convolution(
- data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
- relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
- pool1 = mx.symbol.Pooling(
- data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
- # group 2
- conv2_1 = mx.symbol.Convolution(
- data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
- relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
- conv2_2 = mx.symbol.Convolution(
- data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
- relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
- pool2 = mx.symbol.Pooling(
- data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
- # group 3
- conv3_1 = mx.symbol.Convolution(
- data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
- relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
- conv3_2 = mx.symbol.Convolution(
- data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
- relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
- conv3_3 = mx.symbol.Convolution(
- data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
- relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
- pool3 = mx.symbol.Pooling(
- data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool3")
- # group 4
- conv4_1 = mx.symbol.Convolution(
- data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
- relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
- conv4_2 = mx.symbol.Convolution(
- data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
- relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
- conv4_3 = mx.symbol.Convolution(
- data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
- relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
- pool4 = mx.symbol.Pooling(
- data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
- # group 5
- conv5_1 = mx.symbol.Convolution(
- data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
- relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
- conv5_2 = mx.symbol.Convolution(
- data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
- relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
- conv5_3 = mx.symbol.Convolution(
- data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
- relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+
+ # reshape rois
+ rois = mx.symbol.Reshape(data=rois, shape=(-1, 5), name='rois_reshape')
+
+ # shared convolutional layer
+ relu5_3 = get_vgg_conv(data)
+
+ # Fast R-CNN
pool5 = mx.symbol.ROIPooling(
name='roi_pool5', data=relu5_3, rois=rois, pooled_size=(7, 7), spatial_scale=0.0625)
# group 6
@@ -166,6 +151,151 @@ def get_symbol_vgg_test(num_classes=21):
cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score)
# bounding box regression
bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=drop7, num_hidden=num_classes * 4)
+
+ # reshape output
+ cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(config.TEST.BATCH_IMAGES, -1, num_classes), name='cls_prob_reshape')
+ bbox_pred = mx.symbol.Reshape(data=bbox_pred, shape=(config.TEST.BATCH_IMAGES, -1, 4 * num_classes), name='bbox_pred_reshape')
+
# group output
group = mx.symbol.Group([cls_prob, bbox_pred])
return group
+
+
+def get_vgg_rpn(num_classes=21, num_anchors=9):
+ """
+ Region Proposal Network with VGG
+ :param num_classes: used to determine output size
+ :param num_anchors: used to determine output size
+ :return: Symbol
+ """
+ data = mx.symbol.Variable(name="data")
+ label = mx.symbol.Variable(name='label')
+ bbox_target = mx.symbol.Variable(name='bbox_target')
+ bbox_inside_weight = mx.symbol.Variable(name='bbox_inside_weight')
+ bbox_outside_weight = mx.symbol.Variable(name='bbox_outside_weight')
+
+ # shared convolutional layers
+ relu5_3 = get_vgg_conv(data)
+
+ # RPN
+ rpn_conv = mx.symbol.Convolution(
+ data=relu5_3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3")
+ rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu")
+ rpn_cls_score = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score")
+ rpn_bbox_pred = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred")
+
+ # prepare rpn data
+ rpn_cls_score_reshape = mx.symbol.Reshape(
+ data=rpn_cls_score, shape=(0, 2, -1), name="rpn_cls_score_reshape")
+
+ # classification
+ cls_prob = mx.symbol.SoftmaxOutput(data=rpn_cls_score_reshape, label=label, multi_output=True,
+ normalization='valid', use_ignore=True, ignore_label=-1, name="cls_prob")
+ # bounding box regression
+ bbox_loss_ = bbox_outside_weight * \
+ mx.symbol.smooth_l1(name='bbox_loss_', scalar=3.0,
+ data=bbox_inside_weight * (rpn_bbox_pred - bbox_target))
+ bbox_loss = mx.sym.MakeLoss(name='bbox_loss', data=bbox_loss_)
+ # group output
+ group = mx.symbol.Group([cls_prob, bbox_loss])
+ return group
+
+
+def get_vgg_rpn_test(num_classes=21, num_anchors=9):
+ """
+ Region Proposal Network with VGG
+ :param num_classes: used to determine output size
+ :param num_anchors: used to determine output size
+ :return: Symbol
+ """
+ data = mx.symbol.Variable(name="data")
+ im_info = mx.symbol.Variable(name="im_info")
+
+ # shared convolutional layers
+ relu5_3 = get_vgg_conv(data)
+
+ # RPN
+ rpn_conv = mx.symbol.Convolution(
+ data=relu5_3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3")
+ rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu")
+ rpn_cls_score = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score")
+ rpn_bbox_pred = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred")
+
+ # ROI Proposal
+ rpn_cls_score_reshape = mx.symbol.Reshape(
+ data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape")
+ rpn_cls_prob = mx.symbol.SoftmaxActivation(
+ data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob")
+ rpn_cls_prob_reshape = mx.symbol.Reshape(
+ data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape')
+ group = mx.symbol.Custom(
+ cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
+ op_type='proposal', feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), output_score=True)
+ # rois = group[0]
+ # score = group[1]
+
+ return group
+
+
+def get_vgg_test(num_classes=21, num_anchors=9):
+ """
+ Faster R-CNN test with VGG 16 conv layers
+ :param num_classes: used to determine output size
+ :param num_anchors: used to determine output size
+ :return: Symbol
+ """
+ data = mx.symbol.Variable(name="data")
+ im_info = mx.symbol.Variable(name="im_info")
+
+ # shared convolutional layers
+ relu5_3 = get_vgg_conv(data)
+
+ # RPN
+ rpn_conv = mx.symbol.Convolution(
+ data=relu5_3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="rpn_conv_3x3")
+ rpn_relu = mx.symbol.Activation(data=rpn_conv, act_type="relu", name="rpn_relu")
+ rpn_cls_score = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=2 * num_anchors, name="rpn_cls_score")
+ rpn_bbox_pred = mx.symbol.Convolution(
+ data=rpn_relu, kernel=(1, 1), pad=(0, 0), num_filter=4 * num_anchors, name="rpn_bbox_pred")
+
+ # ROI Proposal
+ rpn_cls_score_reshape = mx.symbol.Reshape(
+ data=rpn_cls_score, shape=(0, 2, -1, 0), name="rpn_cls_score_reshape")
+ rpn_cls_prob = mx.symbol.SoftmaxActivation(
+ data=rpn_cls_score_reshape, mode="channel", name="rpn_cls_prob")
+ rpn_cls_prob_reshape = mx.symbol.Reshape(
+ data=rpn_cls_prob, shape=(0, 2 * num_anchors, -1, 0), name='rpn_cls_prob_reshape')
+ rois = mx.symbol.Custom(
+ cls_prob=rpn_cls_prob_reshape, bbox_pred=rpn_bbox_pred, im_info=im_info, name='rois',
+ op_type='proposal', feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2))
+
+ # Fast R-CNN
+ pool5 = mx.symbol.ROIPooling(
+ name='roi_pool5', data=relu5_3, rois=rois, pooled_size=(7, 7), spatial_scale=0.0625)
+ # group 6
+ flatten = mx.symbol.Flatten(data=pool5, name="flatten")
+ fc6 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
+ relu6 = mx.symbol.Activation(data=fc6, act_type="relu", name="relu6")
+ drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+ # group 7
+ fc7 = mx.symbol.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
+ relu7 = mx.symbol.Activation(data=fc7, act_type="relu", name="relu7")
+ drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+ # classification
+ cls_score = mx.symbol.FullyConnected(name='cls_score', data=drop7, num_hidden=num_classes)
+ cls_prob = mx.symbol.SoftmaxOutput(name='cls_prob', data=cls_score)
+ # bounding box regression
+ bbox_pred = mx.symbol.FullyConnected(name='bbox_pred', data=drop7, num_hidden=num_classes * 4)
+
+ # reshape output
+ cls_prob = mx.symbol.Reshape(data=cls_prob, shape=(config.TEST.BATCH_IMAGES, -1, num_classes), name='cls_prob_reshape')
+ bbox_pred = mx.symbol.Reshape(data=bbox_pred, shape=(config.TEST.BATCH_IMAGES, -1, 4 * num_classes), name='bbox_pred_reshape')
+
+ # group output
+ group = mx.symbol.Group([rois, cls_prob, bbox_pred])
+ return group
diff --git a/example/rcnn/rcnn/tester.py b/example/rcnn/rcnn/tester.py
index 3f69daa6f822..0dc253e3878b 100644
--- a/example/rcnn/rcnn/tester.py
+++ b/example/rcnn/rcnn/tester.py
@@ -19,7 +19,7 @@ def pred_eval(detector, test_data, imdb, vis=False):
"""
assert not test_data.shuffle
- thresh = 0.1
+ thresh = 0.05
# limit detections to max_per_image over all classes
max_per_image = 100
@@ -35,15 +35,17 @@ def pred_eval(detector, test_data, imdb, vis=False):
if i % 10 == 0:
print 'testing {}/{}'.format(i, imdb.num_images)
- scores, boxes = detector.im_detect(databatch.data['data'], databatch.data['rois'])
-
- # we used scaled image & roi to train, so it is necessary to transform them back
- # visualization should also be from the original size
- im_path = imdb.image_path_from_index(imdb.image_set_index[i])
- im = cv2.imread(im_path)
- im_height = im.shape[0]
- scale = float(databatch.data['data'].shape[2]) / float(im_height)
- im = image_processing.transform(im, config.PIXEL_MEANS)
+ if config.TEST.HAS_RPN:
+ scores, boxes = detector.im_detect(databatch.data['data'], im_info=databatch.data['im_info'])
+ scale = databatch.data['im_info'][0, 2]
+ else:
+ scores, boxes = detector.im_detect(databatch.data['data'], roi_array=databatch.data['rois'])
+ # we used scaled image & roi to train, so it is necessary to transform them back
+ # visualization should also be from the original size
+ im_path = imdb.image_path_from_index(imdb.image_set_index[i])
+ im = cv2.imread(im_path)
+ im_height = im.shape[0]
+ scale = float(databatch.data['data'].shape[2]) / float(im_height)
for j in range(1, imdb.num_classes):
indexes = np.where(scores[:, j] > thresh)[0]
@@ -64,7 +66,11 @@ def pred_eval(detector, test_data, imdb, vis=False):
boxes_this_image = [[]] + [all_boxes[j][i] for j in range(1, imdb.num_classes)]
if vis:
- vis_all_detection(im, boxes_this_image,
+ # visualize the testing scale
+ for box in boxes_this_image:
+ if isinstance(box, np.ndarray):
+ box[:, :4] *= scale
+ vis_all_detection(databatch.data['data'], boxes_this_image,
imdb_classes=imdb.classes)
i += 1
@@ -78,7 +84,7 @@ def pred_eval(detector, test_data, imdb, vis=False):
imdb.evaluate_detections(all_boxes)
-def vis_all_detection(im_array, detections, imdb_classes=None, thresh=0.):
+def vis_all_detection(im_array, detections, imdb_classes=None, thresh=0.7):
"""
visualize all detections in one image
:param im_array: [b=1 c h w] in rgb
@@ -101,8 +107,9 @@ def vis_all_detection(im_array, detections, imdb_classes=None, thresh=0.):
rect = plt.Rectangle((bbox[0], bbox[1]),
bbox[2] - bbox[0],
bbox[3] - bbox[1], fill=False,
- edgecolor=color, linewidth=2)
+ edgecolor=color, linewidth=3.5)
plt.gca().add_patch(rect)
- plt.gca().annotate('{} {:.3f}'.format(imdb_classes[j], score),
- rect.get_xy(), color='w')
+ plt.gca().text(bbox[0], bbox[1] - 2,
+ '{:s} {:.3f}'.format(imdb_classes[j], score),
+ bbox=dict(facecolor=color, alpha=0.5), fontsize=12, color='white')
plt.show()
diff --git a/example/rcnn/test.py b/example/rcnn/test.py
index be183c9ef7d1..74ffc40673c2 100644
--- a/example/rcnn/test.py
+++ b/example/rcnn/test.py
@@ -1,29 +1,12 @@
import argparse
-import mxnet as mx
import os
-from tools.test_net import test_net
+import mxnet as mx
-def parse_args():
- parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
- parser.add_argument('--image_set', dest='image_set', help='can be test',
- default='test', type=str)
- parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
- default='2007', type=str)
- parser.add_argument('--root_path', dest='root_path', help='output data folder',
- default=os.path.join(os.getcwd(), 'data'), type=str)
- parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
- default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
- parser.add_argument('--prefix', dest='prefix', help='new model prefix',
- default=os.path.join(os.getcwd(), 'model', 'frcnn'), type=str)
- parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
- default=9, type=int)
- parser.add_argument('--gpu', dest='gpu_id', help='GPU device to test with',
- default=0, type=int)
- args = parser.parse_args()
- return args
+from tools.test_rcnn import test_rcnn
+from tools.test_rcnn import parse_args
if __name__ == '__main__':
args = parse_args()
ctx = mx.gpu(args.gpu_id)
- test_net(args.image_set, args.year, args.root_path, args.devkit_path, args.prefix, args.epoch, ctx)
+ test_rcnn(args.image_set, args.year, args.root_path, args.devkit_path, args.prefix, args.epoch, ctx, args.vis, args.has_rpn)
diff --git a/example/rcnn/tools/demo_net.py b/example/rcnn/tools/demo_net.py
deleted file mode 100644
index 4e9cdf7cd3eb..000000000000
--- a/example/rcnn/tools/demo_net.py
+++ /dev/null
@@ -1,50 +0,0 @@
-import numpy as np
-import cv2
-import scipy.io as sio
-from helper.processing.image_processing import resize, transform
-from rcnn.config import config
-from helper.processing.nms import nms
-from rcnn.tester import vis_all_detection
-
-CLASSES = ('__background__',
- 'aeroplane', 'bicycle', 'bird', 'boat',
- 'bottle', 'bus', 'car', 'cat', 'chair',
- 'cow', 'diningtable', 'dog', 'horse',
- 'motorbike', 'person', 'pottedplant',
- 'sheep', 'sofa', 'train', 'tvmonitor')
-
-
-def demo_net(detector, image_name):
- """
- wrapper for detector
- :param detector: Detector
- :param image_name: image name
- :return: None
- """
- # load demo data
- im = cv2.imread(image_name + '.jpg')
- im_array, im_scale = resize(im, config.TEST.SCALES[0], config.TRAIN.MAX_SIZE)
- im_array = transform(im_array, config.PIXEL_MEANS)
- roi_array = sio.loadmat(image_name + '_boxes.mat')['boxes']
- batch_index_array = np.zeros((roi_array.shape[0], 1))
- projected_rois = roi_array * im_scale
- roi_array = np.hstack((batch_index_array, projected_rois))
-
- scores, boxes = detector.im_detect(im_array, roi_array)
-
- all_boxes = [[] for _ in CLASSES]
- CONF_THRESH = 0.8
- NMS_THRESH = 0.3
- for cls in CLASSES:
- cls_ind = CLASSES.index(cls)
- cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
- cls_scores = scores[:, cls_ind]
- keep = np.where(cls_scores >= CONF_THRESH)[0]
- cls_boxes = cls_boxes[keep, :]
- cls_scores = cls_scores[keep]
- dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
- keep = nms(dets, NMS_THRESH)
- all_boxes[cls_ind] = dets[keep, :]
-
- boxes_this_image = [[]] + [all_boxes[j] for j in range(1, len(CLASSES))]
- vis_all_detection(im_array, boxes_this_image, CLASSES, 0)
diff --git a/example/rcnn/tools/load_data.py b/example/rcnn/tools/load_data.py
deleted file mode 100644
index 2ecdb512cb13..000000000000
--- a/example/rcnn/tools/load_data.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from helper.dataset.pascal_voc import PascalVOC
-from helper.processing.roidb import prepare_roidb, add_bbox_regression_targets
-
-
-def load_train_roidb(image_set, year, root_path, devkit_path, flip=False):
- voc = PascalVOC(image_set, year, root_path, devkit_path)
- gt_roidb = voc.gt_roidb()
- ss_roidb = voc.selective_search_roidb(gt_roidb)
- if flip:
- ss_roidb = voc.append_flipped_images(ss_roidb)
- prepare_roidb(voc, ss_roidb)
- means, stds = add_bbox_regression_targets(ss_roidb)
- return voc, ss_roidb, means, stds
-
-
-def load_test_roidb(image_set, year, root_path, devkit_path):
- voc = PascalVOC(image_set, year, root_path, devkit_path)
- gt_roidb = voc.gt_roidb()
- ss_roidb = voc.selective_search_roidb(gt_roidb)
- prepare_roidb(voc, ss_roidb)
- return voc, ss_roidb
diff --git a/example/rcnn/tools/test_net.py b/example/rcnn/tools/test_net.py
deleted file mode 100644
index 1c0a763d24d3..000000000000
--- a/example/rcnn/tools/test_net.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import logging
-from load_data import load_test_roidb
-from rcnn.data_iter import ROIIter
-from rcnn.symbol import get_symbol_vgg_test
-from load_model import load_param
-from rcnn.detector import Detector
-from rcnn.tester import pred_eval
-
-
-def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx):
- """
- wrapper for detector
- :param imageset: image set to test on
- :param year: year of image set
- :param root_path: 'data' folder path
- :param devkit_path: 'VOCdevkit' folder path
- :param prefix: new model prefix
- :param epoch: new model epoch
- :param ctx: context to evaluate in
- :return: None
- """
- # set up logger
- logger = logging.getLogger()
- logger.setLevel(logging.INFO)
-
- # load testing data
- voc, roidb = load_test_roidb(imageset, year, root_path, devkit_path)
- test_data = ROIIter(roidb, ctx=ctx, batch_size=1, shuffle=False, mode='test')
-
- # load model
- args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
-
- # load symbol
- sym = get_symbol_vgg_test()
-
- # detect
- detector = Detector(sym, ctx, args, auxs)
- pred_eval(detector, test_data, voc, vis=False)
diff --git a/example/rcnn/tools/test_rcnn.py b/example/rcnn/tools/test_rcnn.py
new file mode 100644
index 000000000000..fdbc92c97acf
--- /dev/null
+++ b/example/rcnn/tools/test_rcnn.py
@@ -0,0 +1,65 @@
+import argparse
+import os
+
+import mxnet as mx
+
+from rcnn.config import config
+from rcnn.loader import ROIIter
+from rcnn.detector import Detector
+from rcnn.symbol import get_vgg_test, get_vgg_rcnn_test
+from rcnn.tester import pred_eval
+from utils.load_data import load_gt_roidb, load_test_ss_roidb, load_test_rpn_roidb
+from utils.load_model import load_param
+
+
+def test_rcnn(imageset, year, root_path, devkit_path, prefix, epoch, ctx, vis=False, has_rpn=True, proposal='rpn'):
+ # load symbol and testing data
+ if has_rpn:
+ sym = get_vgg_test()
+ config.TEST.HAS_RPN = True
+ config.TEST.RPN_PRE_NMS_TOP_N = 6000
+ config.TEST.RPN_POST_NMS_TOP_N = 300
+ voc, roidb = load_gt_roidb(imageset, year, root_path, devkit_path)
+ else:
+ sym = get_vgg_rcnn_test()
+ voc, roidb = eval('load_test_' + proposal + '_roidb')(imageset, year, root_path, devkit_path)
+
+ # get test data iter
+ test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')
+
+ # load model
+ args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
+
+ # detect
+ detector = Detector(sym, ctx, args, auxs)
+ pred_eval(detector, test_data, voc, vis=vis)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
+ parser.add_argument('--image_set', dest='image_set', help='can be test',
+ default='test', type=str)
+ parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
+ default='2007', type=str)
+ parser.add_argument('--root_path', dest='root_path', help='output data folder',
+ default=os.path.join(os.getcwd(), 'data'), type=str)
+ parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
+ default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
+ parser.add_argument('--prefix', dest='prefix', help='model to test with', type=str)
+ parser.add_argument('--epoch', dest='epoch', help='model to test with',
+ default=8, type=int)
+ parser.add_argument('--gpu', dest='gpu_id', help='GPU device to test with',
+ default=0, type=int)
+ parser.add_argument('--vis', dest='vis', help='turn on visualization', action='store_true')
+ parser.add_argument('--has_rpn', dest='has_rpn', help='generate proposals on the fly',
+ action='store_true')
+ parser.add_argument('--proposal', dest='proposal', help='can be ss for selective search or rpn',
+ default='rpn', type=str)
+ args = parser.parse_args()
+ return args
+
+if __name__ == '__main__':
+ args = parse_args()
+ ctx = mx.gpu(args.gpu_id)
+ test_rcnn(args.image_set, args.year, args.root_path, args.devkit_path, args.prefix, args.epoch, ctx, args.vis,
+ args.has_rpn, args.proposal)
diff --git a/example/rcnn/tools/test_rpn.py b/example/rcnn/tools/test_rpn.py
new file mode 100644
index 000000000000..b93c1753a42c
--- /dev/null
+++ b/example/rcnn/tools/test_rpn.py
@@ -0,0 +1,58 @@
+import argparse
+import os
+
+import mxnet as mx
+
+from rcnn.config import config
+from rcnn.loader import ROIIter
+from rcnn.rpn.generate import Detector, generate_detections
+from rcnn.symbol import get_vgg_rpn_test
+from utils.load_data import load_gt_roidb
+from utils.load_model import load_param
+
+# rpn generate proposal config
+config.TEST.HAS_RPN = True
+config.TEST.RPN_PRE_NMS_TOP_N = -1
+config.TEST.RPN_POST_NMS_TOP_N = 2000
+
+
+def test_rpn(image_set, year, root_path, devkit_path, prefix, epoch, ctx, vis=False):
+ # load symbol
+ sym = get_vgg_rpn_test()
+
+ # load testing data
+ voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path)
+ test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')
+
+ # load model
+ args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)
+
+ # start testing
+ detector = Detector(sym, ctx, args, auxs)
+ imdb_boxes = generate_detections(detector, test_data, voc, vis=vis)
+ voc.evaluate_recall(roidb, candidate_boxes=imdb_boxes)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Test a Region Proposal Network')
+ parser.add_argument('--image_set', dest='image_set', help='can be trainval or train',
+ default='trainval', type=str)
+ parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
+ default='2007', type=str)
+ parser.add_argument('--root_path', dest='root_path', help='output data folder',
+ default=os.path.join(os.getcwd(), 'data'), type=str)
+ parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
+ default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
+ parser.add_argument('--prefix', dest='prefix', help='model to test with', type=str)
+ parser.add_argument('--epoch', dest='epoch', help='model to test with',
+ default=8, type=int)
+ parser.add_argument('--gpu', dest='gpu_id', help='GPU device to train with',
+ default=0, type=int)
+ parser.add_argument('--vis', dest='vis', help='turn on visualization', action='store_true')
+ args = parser.parse_args()
+ return args
+
+if __name__ == '__main__':
+ args = parse_args()
+ ctx = mx.gpu(args.gpu_id)
+ test_rpn(args.image_set, args.year, args.root_path, args.devkit_path, args.prefix, args.epoch, ctx, args.vis)
diff --git a/example/rcnn/tools/train_net.py b/example/rcnn/tools/train_net.py
deleted file mode 100644
index ad4552b54af5..000000000000
--- a/example/rcnn/tools/train_net.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import mxnet as mx
-import logging
-from rcnn.config import config
-from load_data import load_train_roidb
-from rcnn.data_iter import ROIIter
-from rcnn.symbol import get_symbol_vgg
-from load_model import load_checkpoint, load_param
-from rcnn.solver import Solver
-from save_model import save_checkpoint
-
-
-def train_net(image_set, year, root_path, devkit_path, pretrained, epoch,
- prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None):
- """
- wrapper for solver
- :param image_set: image set to train on
- :param year: year of image set
- :param root_path: 'data' folder
- :param devkit_path: 'VOCdevkit' folder
- :param pretrained: prefix of pretrained model
- :param epoch: epoch of pretrained model
- :param prefix: prefix of new model
- :param ctx: context to train in
- :param begin_epoch: begin epoch number
- :param end_epoch: end epoch number
- :param frequent: frequency to print
- :return: None
- """
- # set up logger
- logger = logging.getLogger()
- logger.setLevel(logging.INFO)
-
- # load training data
- voc, roidb, means, stds = load_train_roidb(image_set, year, root_path, devkit_path, flip=True)
- train_data = ROIIter(roidb, ctx=ctx, batch_size=config.TRAIN.BATCH_IMAGES, shuffle=True, mode='train', work_load_list=work_load_list)
-
- # load pretrained
- args, auxs = load_param(pretrained, epoch, convert=True, ctx=ctx[0])
- del args['fc8_bias']
- del args['fc8_weight']
-
- # load symbol
- sym = get_symbol_vgg()
-
- # initialize params
- arg_shape, _, _ = sym.infer_shape(data=(1, 3, 224, 224), rois=(1, 5))
- arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
- args['cls_score_weight'] = mx.random.normal(mean=0, stdvar=0.01, shape=arg_shape_dict['cls_score_weight'], ctx=ctx[0])
- args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'], ctx=ctx[0])
- args['bbox_pred_weight'] = mx.random.normal(mean=0, stdvar=0.001, shape=arg_shape_dict['bbox_pred_weight'], ctx=ctx[0])
- args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'], ctx=ctx[0])
-
- # train
- solver = Solver(prefix, sym, ctx, begin_epoch, end_epoch, kv_store, args, auxs, momentum=0.9, wd=0.0005,
- learning_rate=0.001, lr_scheduler=mx.lr_scheduler.FactorScheduler(30000, 0.1), max_data_shape=[('data', (1, 3, 1000, 1000))])
- solver.fit(train_data, frequent=frequent)
-
- # edit params and save
- for epoch in range(begin_epoch + 1, end_epoch + 1):
- arg_params, aux_params = load_checkpoint(prefix, epoch)
- arg_params['bbox_pred_weight'] = (arg_params['bbox_pred_weight'].T * mx.nd.array(stds, ctx=ctx[0])).T
- arg_params['bbox_pred_bias'] = arg_params['bbox_pred_bias'] * mx.nd.array(stds, ctx=ctx[0]) + \
- mx.nd.array(means, ctx=ctx[0])
- save_checkpoint(prefix, epoch, arg_params, aux_params)
diff --git a/example/rcnn/tools/train_rcnn.py b/example/rcnn/tools/train_rcnn.py
new file mode 100644
index 000000000000..432c6a950a79
--- /dev/null
+++ b/example/rcnn/tools/train_rcnn.py
@@ -0,0 +1,138 @@
+import argparse
+import logging
+import os
+
+import mxnet as mx
+
+from rcnn.callback import Speedometer
+from rcnn.config import config
+from rcnn.loader import ROIIter
+from rcnn.metric import AccuracyMetric, LogLossMetric, SmoothL1LossMetric
+from rcnn.module import MutableModule
+from rcnn.symbol import get_vgg_rcnn
+from utils.load_data import load_ss_roidb, load_rpn_roidb
+from utils.load_model import load_checkpoint, load_param
+from utils.save_model import save_checkpoint
+
+
+def train_rcnn(image_set, year, root_path, devkit_path, pretrained, epoch,
+ prefix, ctx, begin_epoch, end_epoch, frequent, kv_store,
+ work_load_list=None, resume=False, proposal='rpn'):
+ # set up logger
+ logger = logging.getLogger()
+ logger.setLevel(logging.INFO)
+
+ # load symbol
+ sym = get_vgg_rcnn()
+
+ # setup multi-gpu
+ config.TRAIN.BATCH_IMAGES *= len(ctx)
+ config.TRAIN.BATCH_SIZE *= len(ctx)
+
+ # load training data
+ voc, roidb, means, stds = eval('load_' + proposal + '_roidb')(image_set, year, root_path, devkit_path, flip=True)
+ train_data = ROIIter(roidb, batch_size=config.TRAIN.BATCH_IMAGES, shuffle=True, mode='train',
+ ctx=ctx, work_load_list=work_load_list)
+
+ # infer max shape
+ max_data_shape = [('data', (config.TRAIN.BATCH_IMAGES, 3, 1000, 1000))]
+
+ # load pretrained
+ args, auxs = load_param(pretrained, epoch, convert=True)
+
+ # initialize params
+ if not resume:
+ input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label}
+ arg_shape, _, _ = sym.infer_shape(**input_shapes)
+ arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
+ args['cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['cls_score_weight'])
+ args['cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['cls_score_bias'])
+ args['bbox_pred_weight'] = mx.random.normal(0, 0.001, shape=arg_shape_dict['bbox_pred_weight'])
+ args['bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['bbox_pred_bias'])
+
+ # prepare training
+ if config.TRAIN.FINETUNE:
+ fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
+ else:
+ fixed_param_prefix = ['conv1', 'conv2']
+ data_names = [k[0] for k in train_data.provide_data]
+ label_names = [k[0] for k in train_data.provide_label]
+ batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
+ epoch_end_callback = mx.callback.do_checkpoint(prefix)
+ if config.TRAIN.HAS_RPN is True:
+ eval_metric = AccuracyMetric(use_ignore=True, ignore=-1)
+ cls_metric = LogLossMetric(use_ignore=True, ignore=-1)
+ else:
+ eval_metric = AccuracyMetric()
+ cls_metric = LogLossMetric()
+ bbox_metric = SmoothL1LossMetric()
+ eval_metrics = mx.metric.CompositeEvalMetric()
+ for child_metric in [eval_metric, cls_metric, bbox_metric]:
+ eval_metrics.add(child_metric)
+ optimizer_params = {'momentum': 0.9,
+ 'wd': 0.0005,
+ 'learning_rate': 0.001,
+ 'lr_scheduler': mx.lr_scheduler.FactorScheduler(30000, 0.1),
+ 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)}
+
+ # train
+ mod = MutableModule(sym, data_names=data_names, label_names=label_names,
+ logger=logger, context=ctx, work_load_list=work_load_list,
+ max_data_shapes=max_data_shape, fixed_param_prefix=fixed_param_prefix)
+ mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
+ batch_end_callback=batch_end_callback, kvstore=kv_store,
+ optimizer='sgd', optimizer_params=optimizer_params,
+ arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
+
+ # edit params and save
+ for epoch in range(begin_epoch + 1, end_epoch + 1):
+ arg_params, aux_params = load_checkpoint(prefix, epoch)
+ arg_params['bbox_pred_weight'] = (arg_params['bbox_pred_weight'].T * mx.nd.array(stds)).T
+ arg_params['bbox_pred_bias'] = arg_params['bbox_pred_bias'] * mx.nd.array(stds) + \
+ mx.nd.array(means)
+ save_checkpoint(prefix, epoch, arg_params, aux_params)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a Fast R-CNN Network')
+ parser.add_argument('--image_set', dest='image_set', help='can be trainval or train',
+ default='trainval', type=str)
+ parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
+ default='2007', type=str)
+ parser.add_argument('--root_path', dest='root_path', help='output data folder',
+ default=os.path.join(os.getcwd(), 'data'), type=str)
+ parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
+ default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
+ parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix',
+ default=os.path.join(os.getcwd(), 'model', 'vgg16'), type=str)
+ parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
+ default=1, type=int)
+ parser.add_argument('--prefix', dest='prefix', help='new model prefix',
+ default=os.path.join(os.getcwd(), 'model', 'rcnn'), type=str)
+ parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
+ default='0', type=str)
+ parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
+ default=0, type=int)
+ parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
+ default=8, type=int)
+ parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
+ default=20, type=int)
+ parser.add_argument('--kv_store', dest='kv_store', help='the kv-store type',
+ default='device', type=str)
+ parser.add_argument('--work_load_list', dest='work_load_list', help='work load for different devices',
+ default=None, type=list)
+ parser.add_argument('--finetune', dest='finetune', help='second round finetune', action='store_true')
+ parser.add_argument('--resume', dest='resume', help='continue training', action='store_true')
+ parser.add_argument('--proposal', dest='proposal', help='can be ss for selective search or rpn',
+ default='rpn', type=str)
+ args = parser.parse_args()
+ return args
+
+if __name__ == '__main__':
+ args = parse_args()
+ ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
+ if args.finetune:
+ config.TRAIN.FINETUNE = True
+ train_rcnn(args.image_set, args.year, args.root_path, args.devkit_path, args.pretrained, args.epoch,
+ args.prefix, ctx, args.begin_epoch, args.end_epoch, args.frequent,
+ args.kv_store, args.work_load_list, args.resume, args.proposal)
diff --git a/example/rcnn/tools/train_rpn.py b/example/rcnn/tools/train_rpn.py
new file mode 100644
index 000000000000..1b3f489b490b
--- /dev/null
+++ b/example/rcnn/tools/train_rpn.py
@@ -0,0 +1,144 @@
+import argparse
+import logging
+import os
+
+import mxnet as mx
+
+from rcnn.callback import Speedometer
+from rcnn.config import config
+from rcnn.loader import AnchorLoader
+from rcnn.metric import AccuracyMetric, LogLossMetric, SmoothL1LossMetric
+from rcnn.module import MutableModule
+from rcnn.symbol import get_vgg_rpn
+from utils.load_data import load_gt_roidb
+from utils.load_model import load_param
+
+# rpn config
+config.TRAIN.HAS_RPN = True
+config.TRAIN.BATCH_SIZE = 1
+
+
+def train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch,
+ prefix, ctx, begin_epoch, end_epoch, frequent, kv_store, work_load_list=None, resume=False):
+ # set up logger
+ logger = logging.getLogger()
+ logger.setLevel(logging.INFO)
+
+ # load symbol
+ sym = get_vgg_rpn()
+ feat_sym = get_vgg_rpn().get_internals()['rpn_cls_score_output']
+
+ # setup multi-gpu
+ config.TRAIN.BATCH_IMAGES *= len(ctx)
+ config.TRAIN.BATCH_SIZE *= len(ctx)
+
+ # load training data
+ voc, roidb = load_gt_roidb(image_set, year, root_path, devkit_path, flip=True)
+ train_data = AnchorLoader(feat_sym, roidb, batch_size=config.TRAIN.BATCH_SIZE, shuffle=True, mode='train',
+ ctx=ctx, work_load_list=work_load_list)
+
+ # infer max shape
+ max_data_shape = [('data', (config.TRAIN.BATCH_SIZE, 3, 1000, 1000))]
+ max_data_shape_dict = {k: v for k, v in max_data_shape}
+ _, feat_shape, _ = feat_sym.infer_shape(**max_data_shape_dict)
+ from rcnn.minibatch import assign_anchor
+ import numpy as np
+ label = assign_anchor(feat_shape[0], np.zeros((0, 5)), [[1000, 1000, 1.0]])
+ max_label_shape = [('label', label['label'].shape),
+ ('bbox_target', label['bbox_target'].shape),
+ ('bbox_inside_weight', label['bbox_inside_weight'].shape),
+ ('bbox_outside_weight', label['bbox_outside_weight'].shape)]
+ print 'providing maximum shape', max_data_shape, max_label_shape
+
+ # load pretrained
+ args, auxs = load_param(pretrained, epoch, convert=True)
+
+ # initialize params
+ if not resume:
+ input_shapes = {k: v for k, v in train_data.provide_data + train_data.provide_label}
+ arg_shape, _, _ = sym.infer_shape(**input_shapes)
+ arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
+ args['rpn_conv_3x3_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_conv_3x3_weight'])
+ args['rpn_conv_3x3_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_conv_3x3_bias'])
+ args['rpn_cls_score_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_cls_score_weight'])
+ args['rpn_cls_score_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_cls_score_bias'])
+ args['rpn_bbox_pred_weight'] = mx.random.normal(0, 0.01, shape=arg_shape_dict['rpn_bbox_pred_weight'])
+ args['rpn_bbox_pred_bias'] = mx.nd.zeros(shape=arg_shape_dict['rpn_bbox_pred_bias'])
+
+ # prepare training
+ if config.TRAIN.FINETUNE:
+ fixed_param_prefix = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5']
+ else:
+ fixed_param_prefix = ['conv1', 'conv2']
+ data_names = [k[0] for k in train_data.provide_data]
+ label_names = [k[0] for k in train_data.provide_label]
+ batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
+ epoch_end_callback = mx.callback.do_checkpoint(prefix)
+ if config.TRAIN.HAS_RPN is True:
+ eval_metric = AccuracyMetric(use_ignore=True, ignore=-1)
+ cls_metric = LogLossMetric(use_ignore=True, ignore=-1)
+ else:
+ eval_metric = AccuracyMetric()
+ cls_metric = LogLossMetric()
+ bbox_metric = SmoothL1LossMetric()
+ eval_metrics = mx.metric.CompositeEvalMetric()
+ for child_metric in [eval_metric, cls_metric, bbox_metric]:
+ eval_metrics.add(child_metric)
+ optimizer_params = {'momentum': 0.9,
+ 'wd': 0.0005,
+ 'learning_rate': 0.001,
+ 'lr_scheduler': mx.lr_scheduler.FactorScheduler(60000, 0.1),
+ 'rescale_grad': (1.0 / config.TRAIN.BATCH_SIZE)}
+
+ # train
+ mod = MutableModule(sym, data_names=data_names, label_names=label_names,
+ logger=logger, context=ctx, work_load_list=work_load_list,
+ max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
+ fixed_param_prefix=fixed_param_prefix)
+ mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
+ batch_end_callback=batch_end_callback, kvstore=kv_store,
+ optimizer='sgd', optimizer_params=optimizer_params,
+ arg_params=args, aux_params=auxs, begin_epoch=begin_epoch, num_epoch=end_epoch)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train a Region Proposal Network')
+ parser.add_argument('--image_set', dest='image_set', help='can be trainval or train',
+ default='trainval', type=str)
+ parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
+ default='2007', type=str)
+ parser.add_argument('--root_path', dest='root_path', help='output data folder',
+ default=os.path.join(os.getcwd(), 'data'), type=str)
+ parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
+ default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
+ parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix',
+ default=os.path.join(os.getcwd(), 'model', 'vgg16'), type=str)
+ parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
+ default=1, type=int)
+ parser.add_argument('--prefix', dest='prefix', help='new model prefix',
+ default=os.path.join(os.getcwd(), 'model', 'rpn'), type=str)
+ parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
+ default='0', type=str)
+ parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
+ default=0, type=int)
+ parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
+ default=8, type=int)
+ parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
+ default=20, type=int)
+ parser.add_argument('--kv_store', dest='kv_store', help='the kv-store type',
+ default='device', type=str)
+ parser.add_argument('--work_load_list', dest='work_load_list', help='work load for different devices',
+ default=None, type=list)
+ parser.add_argument('--finetune', dest='finetune', help='second round finetune', action='store_true')
+ parser.add_argument('--resume', dest='resume', help='continue training', action='store_true')
+ args = parser.parse_args()
+ return args
+
+if __name__ == '__main__':
+ args = parse_args()
+ ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
+ if args.finetune:
+ config.TRAIN.FINETUNE = True
+ train_rpn(args.image_set, args.year, args.root_path, args.devkit_path, args.pretrained, args.epoch,
+ args.prefix, ctx, args.begin_epoch, args.end_epoch, args.frequent,
+ args.kv_store, args.work_load_list, args.resume)
diff --git a/example/rcnn/train.py b/example/rcnn/train.py
deleted file mode 100644
index ad61855ae50f..000000000000
--- a/example/rcnn/train.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import argparse
-import mxnet as mx
-import os
-from tools.train_net import train_net
-
-
-def parse_args():
- parser = argparse.ArgumentParser(description='Train a Fast R-CNN network')
- parser.add_argument('--image_set', dest='image_set', help='can be trainval or train',
- default='trainval', type=str)
- parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
- default='2007', type=str)
- parser.add_argument('--root_path', dest='root_path', help='output data folder',
- default=os.path.join(os.getcwd(), 'data'), type=str)
- parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
- default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
- parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix',
- default=os.path.join(os.getcwd(), 'model', 'vgg16'), type=str)
- parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
- default=1, type=int)
- parser.add_argument('--prefix', dest='prefix', help='new model prefix',
- default=os.path.join(os.getcwd(), 'model', 'frcnn'), type=str)
- parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
- default='0', type=str)
- parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
- default=0, type=int)
- parser.add_argument('--end_epoch', dest='end_epoch', help='end epoch of training',
- default=8, type=int)
- parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
- default=20, type=int)
- parser.add_argument('--kv_store', dest='kv_store', help='the kv-store type',
- default='local', type=str)
- parser.add_argument('--work_load_list', dest='work_load_list', help='work load for different devices',
- default=None, type=list)
- args = parser.parse_args()
- return args
-
-if __name__ == '__main__':
- args = parse_args()
- ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
- train_net(args.image_set, args.year, args.root_path, args.devkit_path, args.pretrained, args.epoch,
- args.prefix, ctx, args.begin_epoch, args.end_epoch, args.frequent, args.kv_store, args.work_load_list)
diff --git a/example/rcnn/train_alternate.py b/example/rcnn/train_alternate.py
new file mode 100644
index 000000000000..5e3ba7f07780
--- /dev/null
+++ b/example/rcnn/train_alternate.py
@@ -0,0 +1,104 @@
+import argparse
+import logging
+import os
+
+import mxnet as mx
+
+from rcnn.config import config
+from rcnn.loader import AnchorLoader, ROIIter
+from tools.train_rpn import train_rpn
+from tools.train_rcnn import train_rcnn
+from tools.test_rpn import test_rpn
+from utils.combine_model import combine_model
+
+
+def alternate_train(image_set, test_image_set, year, root_path, devkit_path, pretrained, epoch,
+ ctx, begin_epoch, rpn_epoch, rcnn_epoch, frequent, kv_store, work_load_list=None):
+ # set up logger
+ logger = logging.getLogger()
+ logger.setLevel(logging.INFO)
+ config.TRAIN.BG_THRESH_LO = 0.0
+
+ logging.info('########## TRAIN RPN WITH IMAGENET INIT')
+ config.TRAIN.HAS_RPN = True
+ config.TRAIN.BATCH_SIZE = 1
+ train_rpn(image_set, year, root_path, devkit_path, pretrained, epoch,
+ 'model/rpn1', ctx, begin_epoch, rpn_epoch, frequent, kv_store, work_load_list)
+
+ logging.info('########## GENERATE RPN DETECTION')
+ config.TEST.HAS_RPN = True
+ config.TEST.RPN_PRE_NMS_TOP_N = -1
+ config.TEST.RPN_POST_NMS_TOP_N = 2000
+ test_rpn(image_set, year, root_path, devkit_path, 'model/rpn1', rpn_epoch, ctx[0])
+
+ logging.info('########## TRAIN RCNN WITH IMAGENET INIT AND RPN DETECTION')
+ config.TRAIN.HAS_RPN = False
+ config.TRAIN.BATCH_SIZE = 128
+ train_rcnn(image_set, year, root_path, devkit_path, pretrained, epoch,
+ 'model/rcnn1', ctx, begin_epoch, rcnn_epoch, frequent, kv_store, work_load_list)
+
+ logging.info('########## TRAIN RPN WITH RCNN INIT')
+ config.TRAIN.HAS_RPN = True
+ config.TRAIN.BATCH_SIZE = 1
+ config.TRAIN.FINETUNE = True
+ train_rpn(image_set, year, root_path, devkit_path, 'model/rcnn1', rcnn_epoch,
+ 'model/rpn2', ctx, begin_epoch, rpn_epoch, frequent, kv_store, work_load_list)
+
+ logging.info('########## GENERATE RPN DETECTION')
+ config.TEST.HAS_RPN = True
+ config.TEST.RPN_PRE_NMS_TOP_N = -1
+ config.TEST.RPN_POST_NMS_TOP_N = 2000
+ test_rpn(image_set, year, root_path, devkit_path, 'model/rpn2', rpn_epoch, ctx[0])
+
+ logger.info('########## COMBINE RPN2 WITH RCNN1')
+ combine_model('model/rpn2', rpn_epoch, 'model/rcnn1', rcnn_epoch, 'model/rcnn2', 0)
+
+ logger.info('########## TRAIN RCNN WITH RPN INIT AND DETECTION')
+ config.TRAIN.HAS_RPN = False
+ config.TRAIN.BATCH_SIZE = 128
+ train_rcnn(image_set, year, root_path, devkit_path, 'model/rcnn2', 0,
+ 'model/rcnn2', ctx, begin_epoch, rcnn_epoch, frequent, kv_store, work_load_list)
+
+ logger.info('########## COMBINE RPN2 WITH RCNN2')
+ combine_model('model/rpn2', rpn_epoch, 'model/rcnn2', rcnn_epoch, 'model/final', 0)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='Train Faster R-CNN Network')
+ parser.add_argument('--image_set', dest='image_set', help='can be trainval or train',
+ default='trainval', type=str)
+ parser.add_argument('--test_image_set', dest='test_image_set', help='can be test or val',
+ default='test', type=str)
+ parser.add_argument('--year', dest='year', help='can be 2007, 2010, 2012',
+ default='2007', type=str)
+ parser.add_argument('--root_path', dest='root_path', help='output data folder',
+ default=os.path.join(os.getcwd(), 'data'), type=str)
+ parser.add_argument('--devkit_path', dest='devkit_path', help='VOCdevkit path',
+ default=os.path.join(os.getcwd(), 'data', 'VOCdevkit'), type=str)
+ parser.add_argument('--pretrained', dest='pretrained', help='pretrained model prefix',
+ default=os.path.join(os.getcwd(), 'model', 'vgg16'), type=str)
+ parser.add_argument('--epoch', dest='epoch', help='epoch of pretrained model',
+ default=1, type=int)
+ parser.add_argument('--gpus', dest='gpu_ids', help='GPU device to train with',
+ default='0', type=str)
+ parser.add_argument('--begin_epoch', dest='begin_epoch', help='begin epoch of training',
+ default=0, type=int)
+ parser.add_argument('--rpn_epoch', dest='rpn_epoch', help='end epoch of rpn training',
+ default=8, type=int)
+ parser.add_argument('--rcnn_epoch', dest='rcnn_epoch', help='end epoch of rcnn training',
+ default=8, type=int)
+ parser.add_argument('--frequent', dest='frequent', help='frequency of logging',
+ default=20, type=int)
+ parser.add_argument('--kv_store', dest='kv_store', help='the kv-store type',
+ default='device', type=str)
+ parser.add_argument('--work_load_list', dest='work_load_list', help='work load for different devices',
+ default=None, type=list)
+ args = parser.parse_args()
+ return args
+
+if __name__ == '__main__':
+ args = parse_args()
+ ctx = [mx.gpu(int(i)) for i in args.gpu_ids.split(',')]
+ alternate_train(args.image_set, args.test_image_set, args.year, args.root_path, args.devkit_path,
+ args.pretrained, args.epoch, ctx, args.begin_epoch, args.rpn_epoch, args.rcnn_epoch,
+ args.frequent, args.kv_store, args.work_load_list)
diff --git a/example/rcnn/utils/__init__.py b/example/rcnn/utils/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/example/rcnn/utils/caffe_convert.py b/example/rcnn/utils/caffe_convert.py
new file mode 100644
index 000000000000..4dfbfb4e186f
--- /dev/null
+++ b/example/rcnn/utils/caffe_convert.py
@@ -0,0 +1,74 @@
+# This script will not work unless all paths are set right
+
+import os
+import sys
+import mxnet as mx
+import numpy as np
+fast_rcnn_path = None
+sys.path.insert(0, os.path.join(fast_rcnn_path, 'caffe-fast-rcnn', 'python'))
+sys.path.insert(0, os.path.join(fast_rcnn_path, 'lib'))
+import caffe
+from rcnn.symbol import get_symbol_vgg_test
+
+def load_model(caffeproto, caffemodel, arg_shape_dic):
+ def get_caffe_iter(layer_names, layers):
+ for layer_idx, layer in enumerate(layers):
+ layer_name = layer_names[layer_idx].replace('/', '_')
+ layer_type = layer.type
+ layer_blobs = layer.blobs
+ yield (layer_name, layer_type, layer_blobs)
+
+ net_caffe = caffe.Net(caffeproto, caffemodel, caffe.TEST)
+ layer_names = net_caffe._layer_names
+ layers = net_caffe.layers
+ iter = ''
+ iter = get_caffe_iter(layer_names, layers)
+ first_conv = True
+
+ arg_params = {}
+ for layer_name, layer_type, layer_blobs in iter:
+ if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14:
+ assert(len(layer_blobs) == 2)
+ wmat = np.array(layer_blobs[0].data).reshape(layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, layer_blobs[0].width)
+ bias = np.array(layer_blobs[1].data)
+ if first_conv:
+ print 'Swapping BGR of caffe into RGB in mxnet'
+ wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :]
+
+ assert(wmat.flags['C_CONTIGUOUS'] is True)
+ assert(bias.flags['C_CONTIGUOUS'] is True)
+ print 'converting layer {0}, wmat shape = {1}, bias shape = {2}'.format(layer_name, wmat.shape, bias.shape)
+ wmat = wmat.reshape((wmat.shape[0], -1))
+ bias = bias.reshape((bias.shape[0], 1))
+ weight_name = layer_name + "_weight"
+ bias_name = layer_name + "_bias"
+
+ if weight_name not in arg_shape_dic:
+ print weight_name + ' not found in arg_shape_dic.'
+ continue
+ wmat = wmat.reshape(arg_shape_dic[weight_name])
+ arg_params[weight_name] = mx.nd.zeros(wmat.shape)
+ arg_params[weight_name][:] = wmat
+
+ bias = bias.reshape(arg_shape_dic[bias_name])
+ arg_params[bias_name] = mx.nd.zeros(bias.shape)
+ arg_params[bias_name][:] = bias
+
+ if first_conv and (layer_type == 'Convolution' or layer_type == 4):
+ first_conv = False
+
+ return arg_params
+
+proto_path = os.path.join(fast_rcnn_path, 'models', 'VGG16', 'test.prototxt')
+model_path = os.path.join(fast_rcnn_path, 'data', 'fast_rcnn_models', 'vgg16_fast_rcnn_iter_40000.caffemodel')
+
+symbol = get_symbol_vgg_test()
+arg_shapes, out_shapes, aux_shapes = symbol.infer_shape(**{'data': (1, 3, 224, 224), 'rois': (1, 5)})
+arg_shape_dic = { name: shape for name, shape in zip(symbol.list_arguments(), arg_shapes) }
+
+arg_params = load_model(proto_path, model_path, arg_shape_dic)
+
+model = mx.model.FeedForward(ctx=mx.cpu(), symbol=symbol, arg_params=arg_params,
+ aux_params={}, num_epoch=1,
+ learning_rate=0.01, momentum=0.9, wd=0.0001)
+model.save('model/ref')
diff --git a/example/rcnn/utils/combine_model.py b/example/rcnn/utils/combine_model.py
new file mode 100644
index 000000000000..5518dda4a989
--- /dev/null
+++ b/example/rcnn/utils/combine_model.py
@@ -0,0 +1,22 @@
+from load_model import load_checkpoint
+from save_model import save_checkpoint
+
+
+def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out):
+ args1, auxs1 = load_checkpoint(prefix1, epoch1)
+ args2, auxs2 = load_checkpoint(prefix2, epoch2)
+ arg_names = args1.keys() + args2.keys()
+ aux_names = auxs1.keys() + auxs2.keys()
+ args = dict()
+ for arg in arg_names:
+ if arg in args1:
+ args[arg] = args1[arg]
+ else:
+ args[arg] = args2[arg]
+ auxs = dict()
+ for aux in aux_names:
+ if aux in auxs1:
+ auxs[aux] = auxs1[aux]
+ else:
+ auxs[aux] = auxs2[aux]
+ save_checkpoint(prefix_out, epoch_out, args, auxs)
diff --git a/example/rcnn/utils/load_data.py b/example/rcnn/utils/load_data.py
new file mode 100644
index 000000000000..cc6317e0e74a
--- /dev/null
+++ b/example/rcnn/utils/load_data.py
@@ -0,0 +1,49 @@
+from helper.dataset.pascal_voc import PascalVOC
+from helper.processing.roidb import prepare_roidb, add_bbox_regression_targets
+
+
+def load_ss_roidb(image_set, year, root_path, devkit_path, flip=False):
+ voc = PascalVOC(image_set, year, root_path, devkit_path)
+ gt_roidb = voc.gt_roidb()
+ ss_roidb = voc.selective_search_roidb(gt_roidb)
+ if flip:
+ ss_roidb = voc.append_flipped_images(ss_roidb)
+ prepare_roidb(voc, ss_roidb)
+ means, stds = add_bbox_regression_targets(ss_roidb)
+ return voc, ss_roidb, means, stds
+
+
+def load_gt_roidb(image_set, year, root_path, devkit_path, flip=False):
+ voc = PascalVOC(image_set, year, root_path, devkit_path)
+ gt_roidb = voc.gt_roidb()
+ if flip:
+ gt_roidb = voc.append_flipped_images(gt_roidb)
+ prepare_roidb(voc, gt_roidb)
+ return voc, gt_roidb
+
+
+def load_rpn_roidb(image_set, year, root_path, devkit_path, flip=False):
+ voc = PascalVOC(image_set, year, root_path, devkit_path)
+ gt_roidb = voc.gt_roidb()
+ rpn_roidb = voc.rpn_roidb(gt_roidb)
+ if flip:
+ rpn_roidb = voc.append_flipped_images(rpn_roidb)
+ prepare_roidb(voc, rpn_roidb)
+ means, stds = add_bbox_regression_targets(rpn_roidb)
+ return voc, rpn_roidb, means, stds
+
+
+def load_test_ss_roidb(image_set, year, root_path, devkit_path):
+ voc = PascalVOC(image_set, year, root_path, devkit_path)
+ gt_roidb = voc.gt_roidb()
+ ss_roidb = voc.selective_search_roidb(gt_roidb)
+ prepare_roidb(voc, ss_roidb)
+ return voc, ss_roidb
+
+
+def load_test_rpn_roidb(image_set, year, root_path, devkit_path):
+ voc = PascalVOC(image_set, year, root_path, devkit_path)
+ gt_roidb = voc.gt_roidb()
+ rpn_roidb = voc.rpn_roidb(gt_roidb)
+ prepare_roidb(voc, rpn_roidb)
+ return voc, rpn_roidb
diff --git a/example/rcnn/tools/load_model.py b/example/rcnn/utils/load_model.py
similarity index 97%
rename from example/rcnn/tools/load_model.py
rename to example/rcnn/utils/load_model.py
index bd5a28ea23ef..c767661232e7 100644
--- a/example/rcnn/tools/load_model.py
+++ b/example/rcnn/utils/load_model.py
@@ -47,7 +47,8 @@ def load_param(prefix, epoch, convert=False, ctx=None):
"""
arg_params, aux_params = load_checkpoint(prefix, epoch)
if convert:
- assert ctx is not None
+ if ctx is None:
+ ctx = mx.cpu()
arg_params = convert_context(arg_params, ctx)
aux_params = convert_context(aux_params, ctx)
return arg_params, aux_params
diff --git a/example/rcnn/tools/save_model.py b/example/rcnn/utils/save_model.py
similarity index 100%
rename from example/rcnn/tools/save_model.py
rename to example/rcnn/utils/save_model.py
diff --git a/example/rnn/README.md b/example/rnn/README.md
index c3b6e225add8..294e7726268e 100644
--- a/example/rnn/README.md
+++ b/example/rnn/README.md
@@ -10,11 +10,6 @@ This folder contains RNN examples using low level symbol interface.
- [gru_bucketing.py](gru_bucketing.py) PennTreeBank language model by using GRU
- [char-rnn.ipynb](char-rnn.ipynb) Notebook to demo how to train a character LSTM by using ```lstm.py```
-## R
-
-- [lstm.R](lstm.R) Functions for building a LSTM Network
-- [char_lstm.R](char_lstm.R) demo how to train a character LSTM by using ```lstm.R```
-
Performance Note:
More ```MXNET_GPU_WORKER_NTHREADS``` may lead to better performance. For setting ```MXNET_GPU_WORKER_NTHREADS```, please refer to [Environment Variables](https://mxnet.readthedocs.org/en/latest/how_to/env_var.html).
diff --git a/example/rnn/char-rnn.ipynb b/example/rnn/char-rnn.ipynb
index cefe60b96995..4ad18815be02 100644
--- a/example/rnn/char-rnn.ipynb
+++ b/example/rnn/char-rnn.ipynb
@@ -43,14 +43,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- " \n"
+ " \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- " "
+ " "
]
},
{
@@ -145,7 +145,7 @@
],
"source": [
"import os\n",
- "data_url = \"http://webdocs.cs.ualberta.ca/~bx3/lab_data.zip\"\n",
+ "data_url = \"http://data.dmlc.ml/mxnet/data/lab_data.zip\"\n",
"os.system(\"wget %s\" % data_url)\n",
"os.system(\"unzip -o lab_data.zip\")"
]
diff --git a/example/svm_mnist/README.md b/example/svm_mnist/README.md
new file mode 100644
index 000000000000..082c2053f27e
--- /dev/null
+++ b/example/svm_mnist/README.md
@@ -0,0 +1,11 @@
+# Use case with Support Vector Machine
+
+To ensure that not only the implementation is learning, but is able to outsmart the softmax, as [this article](arxiv.org/pdf/1306.0239.pdf) suggests, I ran svm_mnist.py script. It was based on the MNIST experiment description on the article and [this tutorial](https://github.com/dmlc/mxnet-gtc-tutorial/blob/master/tutorial.ipynb).
+
+
+## To this you will need
+
+* [Numpy](http://www.scipy.org/scipylib/download.html)
+* [Sklearn](http://scikit-learn.org/stable/install.html)
+
+I recommend installing [matplot](http://matplotlib.org/users/installing.html) to visualize examples
\ No newline at end of file
diff --git a/example/svm_mnist/svm_mnist.py b/example/svm_mnist/svm_mnist.py
new file mode 100644
index 000000000000..f36a0457616f
--- /dev/null
+++ b/example/svm_mnist/svm_mnist.py
@@ -0,0 +1,84 @@
+
+#############################################################
+## Please read the README.md document for better reference ##
+#############################################################
+
+import mxnet as mx
+import numpy as np
+from sklearn.datasets import fetch_mldata
+from sklearn.decomposition import PCA
+# import matplotlib.pyplot as plt
+import logging
+
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+# Network declaration as symbols. The following pattern was based
+# on the article, but feel free to play with the number of nodes
+# and with the activation function
+data = mx.symbol.Variable('data')
+fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=512)
+act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
+fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 512)
+act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
+fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10)
+
+# Here we add the ultimate layer based on L2-SVM objective
+mlp = mx.symbol.SVMOutput(data=fc3, name='svm')
+
+# To use L1-SVM objective, comment the line above and uncomment the line below
+# mlp = mx.symbol.SVMOutput(data=fc3, name='svm', use_linear=True)
+
+# Now we fetch MNIST dataset, add some noise, as the article suggests,
+# permutate and assign the examples to be used on our network
+mnist = fetch_mldata('MNIST original')
+mnist_pca = PCA(n_components=70).fit_transform(mnist.data)
+noise = np.random.normal(size=mnist_pca.shape)
+mnist_pca += noise
+np.random.seed(1234) # set seed for deterministic ordering
+p = np.random.permutation(mnist_pca.shape[0])
+X = mnist_pca[p]
+Y = mnist.target[p]
+X_show = mnist.data[p]
+
+# This is just to normalize the input to a value inside [0,1],
+# and separate train set and test set
+X = X.astype(np.float32)/255
+X_train = X[:60000]
+X_test = X[60000:]
+X_show = X_show[60000:]
+Y_train = Y[:60000]
+Y_test = Y[60000:]
+
+# Article's suggestion on batch size
+batch_size = 200
+train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size)
+test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size)
+
+# A quick work around to prevent mxnet complaining the lack of a softmax_label
+train_iter.label = mx.io._init_data(Y_train, allow_empty=True, default_name='svm_label')
+test_iter.label = mx.io._init_data(Y_test, allow_empty=True, default_name='svm_label')
+
+# Here we instatiate and fit the model for our data
+# The article actually suggests using 400 epochs,
+# But I reduced to 10, for convinience
+model = mx.model.FeedForward(
+ ctx = mx.cpu(0), # Run on CPU 0
+ symbol = mlp, # Use the network we just defined
+ num_epoch = 10, # Train for 10 epochs
+ learning_rate = 0.1, # Learning rate
+ momentum = 0.9, # Momentum for SGD with momentum
+ wd = 0.00001, # Weight decay for regularization
+ )
+model.fit(
+ X=train_iter, # Training data set
+ eval_data=test_iter, # Testing data set. MXNet computes scores on test set every epoch
+ batch_end_callback = mx.callback.Speedometer(batch_size, 200)) # Logging module to print out progress
+
+# Uncomment to view an example
+# plt.imshow((X_show[0].reshape((28,28))*255).astype(np.uint8), cmap='Greys_r')
+# plt.show()
+# print 'Result:', model.predict(X_test[0:1])[0].argmax()
+
+# Now it prints how good did the network did for this configuration
+print 'Accuracy:', model.score(test_iter)*100, '%'
\ No newline at end of file
diff --git a/example/warpctc/README.md b/example/warpctc/README.md
new file mode 100644
index 000000000000..32306aa157bd
--- /dev/null
+++ b/example/warpctc/README.md
@@ -0,0 +1,91 @@
+# Baidu Warp CTC with Mxnet
+
+Baidu-warpctc is a CTC implement by Baidu which support GPU. CTC can be used with LSTM to solve lable alignment problems in many areas such as OCR, speech recognition.
+
+## Install baidu warpctc
+
+```
+ cd ~/
+ git clone https://github.com/baidu-research/warp-ctc
+ cd warp-ctc
+ mkdir build
+ cd build
+ cmake ..
+ make
+ sudo make install
+```
+
+## Enable warpctc in mxnet
+
+```
+ comment out following lines in make/config.mk
+ WARPCTC_PATH = $(HOME)/warp-ctc
+ MXNET_PLUGINS += plugin/warpctc/warpctc.mk
+
+ rebuild mxnet by
+ make clean && make -j4
+```
+
+## Run examples
+
+I implement two examples, one is just a toy example which can be used to prove ctc integration is right. The second is a OCR example with LSTM+CTC. You can run it by:
+
+```
+ cd examples/warpctc
+ python lstm_ocr.py
+```
+
+The OCR example is constructed as follows:
+
+1. I generate 80x30 image for 4 digits captcha by an python captcha library
+2. The 80x30 image is used as 80 input for lstm and every input is one column of image (a 30 dim vector)
+3. The output layer use CTC loss
+
+Following code show detail construction of the net:
+
+```
+ def lstm_unroll(num_lstm_layer, seq_len,
+ num_hidden, num_label):
+ param_cells = []
+ last_states = []
+ for i in range(num_lstm_layer):
+ param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable("l%d_i2h_weight" % i),
+ i2h_bias=mx.sym.Variable("l%d_i2h_bias" % i),
+ h2h_weight=mx.sym.Variable("l%d_h2h_weight" % i),
+ h2h_bias=mx.sym.Variable("l%d_h2h_bias" % i)))
+ state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i),
+ h=mx.sym.Variable("l%d_init_h" % i))
+ last_states.append(state)
+ assert(len(last_states) == num_lstm_layer)
+ data = mx.sym.Variable('data')
+ label = mx.sym.Variable('label')
+
+ #every column of image is an input, there are seq_len inputs
+ wordvec = mx.sym.SliceChannel(data=data, num_outputs=seq_len, squeeze_axis=1)
+ hidden_all = []
+ for seqidx in range(seq_len):
+ hidden = wordvec[seqidx]
+ for i in range(num_lstm_layer):
+ next_state = lstm(num_hidden, indata=hidden,
+ prev_state=last_states[i],
+ param=param_cells[i],
+ seqidx=seqidx, layeridx=i)
+ hidden = next_state.h
+ last_states[i] = next_state
+ hidden_all.append(hidden)
+ hidden_concat = mx.sym.Concat(*hidden_all, dim=0)
+ pred = mx.sym.FullyConnected(data=hidden_concat, num_hidden=11)
+
+ # here we do NOT need to transpose label as other lstm examples do
+ label = mx.sym.Reshape(data=label, target_shape=(0,))
+ #label should be int type, so use cast
+ label = mx.sym.Cast(data = label, dtype = 'int32')
+ sm = mx.sym.WarpCTC(data=pred, label=label, label_length = num_label, input_length = seq_len)
+ return sm
+```
+
+## Support multi label length
+
+If you label length is smalled than or equal to b. You should provide labels with length b, and for those samples which label length is smaller than b, you should append 0 to label data to make it have length b.
+
+Here, 0 is reserved for blank label.
diff --git a/example/warpctc/lstm.py b/example/warpctc/lstm.py
new file mode 100644
index 000000000000..32ba2455e11d
--- /dev/null
+++ b/example/warpctc/lstm.py
@@ -0,0 +1,79 @@
+# pylint:skip-file
+import sys
+sys.path.insert(0, "../../python")
+import mxnet as mx
+import numpy as np
+from collections import namedtuple
+import time
+import math
+LSTMState = namedtuple("LSTMState", ["c", "h"])
+LSTMParam = namedtuple("LSTMParam", ["i2h_weight", "i2h_bias",
+ "h2h_weight", "h2h_bias"])
+LSTMModel = namedtuple("LSTMModel", ["rnn_exec", "symbol",
+ "init_states", "last_states",
+ "seq_data", "seq_labels", "seq_outputs",
+ "param_blocks"])
+
+def lstm(num_hidden, indata, prev_state, param, seqidx, layeridx):
+ """LSTM Cell symbol"""
+ i2h = mx.sym.FullyConnected(data=indata,
+ weight=param.i2h_weight,
+ bias=param.i2h_bias,
+ num_hidden=num_hidden * 4,
+ name="t%d_l%d_i2h" % (seqidx, layeridx))
+ h2h = mx.sym.FullyConnected(data=prev_state.h,
+ weight=param.h2h_weight,
+ bias=param.h2h_bias,
+ num_hidden=num_hidden * 4,
+ name="t%d_l%d_h2h" % (seqidx, layeridx))
+ gates = i2h + h2h
+ slice_gates = mx.sym.SliceChannel(gates, num_outputs=4,
+ name="t%d_l%d_slice" % (seqidx, layeridx))
+ in_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid")
+ in_transform = mx.sym.Activation(slice_gates[1], act_type="tanh")
+ forget_gate = mx.sym.Activation(slice_gates[2], act_type="sigmoid")
+ out_gate = mx.sym.Activation(slice_gates[3], act_type="sigmoid")
+ next_c = (forget_gate * prev_state.c) + (in_gate * in_transform)
+ next_h = out_gate * mx.sym.Activation(next_c, act_type="tanh")
+ return LSTMState(c=next_c, h=next_h)
+
+
+def lstm_unroll(num_lstm_layer, seq_len,
+ num_hidden, num_label):
+ param_cells = []
+ last_states = []
+ for i in range(num_lstm_layer):
+ param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable("l%d_i2h_weight" % i),
+ i2h_bias=mx.sym.Variable("l%d_i2h_bias" % i),
+ h2h_weight=mx.sym.Variable("l%d_h2h_weight" % i),
+ h2h_bias=mx.sym.Variable("l%d_h2h_bias" % i)))
+ state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i),
+ h=mx.sym.Variable("l%d_init_h" % i))
+ last_states.append(state)
+ assert(len(last_states) == num_lstm_layer)
+
+ # embeding layer
+ data = mx.sym.Variable('data')
+ label = mx.sym.Variable('label')
+ wordvec = mx.sym.SliceChannel(data=data, num_outputs=seq_len, squeeze_axis=1)
+
+ hidden_all = []
+ for seqidx in range(seq_len):
+ hidden = wordvec[seqidx]
+ for i in range(num_lstm_layer):
+ next_state = lstm(num_hidden, indata=hidden,
+ prev_state=last_states[i],
+ param=param_cells[i],
+ seqidx=seqidx, layeridx=i)
+ hidden = next_state.h
+ last_states[i] = next_state
+ hidden_all.append(hidden)
+
+ hidden_concat = mx.sym.Concat(*hidden_all, dim=0)
+ pred = mx.sym.FullyConnected(data=hidden_concat, num_hidden=11)
+
+ label = mx.sym.Reshape(data=label, shape=(-1,))
+ label = mx.sym.Cast(data = label, dtype = 'int32')
+ sm = mx.sym.WarpCTC(data=pred, label=label, label_length = num_label, input_length = seq_len)
+ return sm
+
diff --git a/example/warpctc/lstm_ocr.py b/example/warpctc/lstm_ocr.py
new file mode 100644
index 000000000000..048572500b85
--- /dev/null
+++ b/example/warpctc/lstm_ocr.py
@@ -0,0 +1,176 @@
+# pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
+# pylint: disable=superfluous-parens, no-member, invalid-name
+import sys, random
+sys.path.insert(0, "../../python")
+import numpy as np
+import mxnet as mx
+
+from lstm import lstm_unroll
+
+from io import BytesIO
+from captcha.image import ImageCaptcha
+import cv2, random
+
+class SimpleBatch(object):
+ def __init__(self, data_names, data, label_names, label):
+ self.data = data
+ self.label = label
+ self.data_names = data_names
+ self.label_names = label_names
+
+ self.pad = 0
+ self.index = None # TODO: what is index?
+
+ @property
+ def provide_data(self):
+ return [(n, x.shape) for n, x in zip(self.data_names, self.data)]
+
+ @property
+ def provide_label(self):
+ return [(n, x.shape) for n, x in zip(self.label_names, self.label)]
+
+def gen_rand():
+ buf = ""
+ max_len = random.randint(3,4)
+ for i in range(max_len):
+ buf += str(random.randint(0,9))
+ return buf
+
+def get_label(buf):
+ ret = np.zeros(4)
+ for i in range(len(buf)):
+ ret[i] = 1 + int(buf[i])
+ if len(buf) == 3:
+ ret[3] = 0
+ return ret
+
+class OCRIter(mx.io.DataIter):
+ def __init__(self, count, batch_size, num_label, init_states):
+ super(OCRIter, self).__init__()
+ self.captcha = ImageCaptcha(fonts=['./data/Xerox.ttf'])
+ self.batch_size = batch_size
+ self.count = count
+ self.num_label = num_label
+ self.init_states = init_states
+ self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
+ self.provide_data = [('data', (batch_size, 2400))] + init_states
+ self.provide_label = [('label', (self.batch_size, 4))]
+
+ def __iter__(self):
+ print 'iter'
+ init_state_names = [x[0] for x in self.init_states]
+ for k in range(self.count):
+ data = []
+ label = []
+ for i in range(self.batch_size):
+ num = gen_rand()
+ img = self.captcha.generate(num)
+ img = np.fromstring(img.getvalue(), dtype='uint8')
+ img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
+ img = cv2.resize(img, (80, 30))
+ img = img.transpose(1, 0)
+ img = img.reshape((80 * 30))
+ img = np.multiply(img, 1/255.0)
+ data.append(img)
+ label.append(get_label(num))
+
+ data_all = [mx.nd.array(data)] + self.init_state_arrays
+ label_all = [mx.nd.array(label)]
+ data_names = ['data'] + init_state_names
+ label_names = ['label']
+
+
+ data_batch = SimpleBatch(data_names, data_all, label_names, label_all)
+ yield data_batch
+
+ def reset(self):
+ pass
+
+BATCH_SIZE = 32
+SEQ_LENGTH = 80
+
+def ctc_label(p):
+ ret = []
+ p1 = [0] + p
+ for i in range(len(p)):
+ c1 = p1[i]
+ c2 = p1[i+1]
+ if c2 == 0 or c2 == c1:
+ continue
+ ret.append(c2)
+ return ret
+
+def remove_blank(l):
+ ret = []
+ for i in range(len(l)):
+ if l[i] == 0:
+ break
+ ret.append(l[i])
+ return ret
+
+def Accuracy(label, pred):
+ global BATCH_SIZE
+ global SEQ_LENGTH
+ hit = 0.
+ total = 0.
+ for i in range(BATCH_SIZE):
+ l = remove_blank(label[i])
+ p = []
+ for k in range(SEQ_LENGTH):
+ p.append(np.argmax(pred[k * BATCH_SIZE + i]))
+ p = ctc_label(p)
+ if len(p) == len(l):
+ match = True
+ for k in range(len(p)):
+ if p[k] != int(l[k]):
+ match = False
+ break
+ if match:
+ hit += 1.0
+ total += 1.0
+ return hit / total
+
+if __name__ == '__main__':
+ num_hidden = 100
+ num_lstm_layer = 2
+
+ num_epoch = 10
+ learning_rate = 0.001
+ momentum = 0.9
+ num_label = 4
+
+ contexts = [mx.context.gpu(1)]
+
+ def sym_gen(seq_len):
+ return lstm_unroll(num_lstm_layer, seq_len,
+ num_hidden=num_hidden,
+ num_label = num_label)
+
+ init_c = [('l%d_init_c'%l, (BATCH_SIZE, num_hidden)) for l in range(num_lstm_layer)]
+ init_h = [('l%d_init_h'%l, (BATCH_SIZE, num_hidden)) for l in range(num_lstm_layer)]
+ init_states = init_c + init_h
+
+ data_train = OCRIter(10000, BATCH_SIZE, num_label, init_states)
+ data_val = OCRIter(1000, BATCH_SIZE, num_label, init_states)
+
+ symbol = sym_gen(SEQ_LENGTH)
+
+ model = mx.model.FeedForward(ctx=contexts,
+ symbol=symbol,
+ num_epoch=num_epoch,
+ learning_rate=learning_rate,
+ momentum=momentum,
+ wd=0.00001,
+ initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
+
+ import logging
+ head = '%(asctime)-15s %(message)s'
+ logging.basicConfig(level=logging.DEBUG, format=head)
+
+ print 'begin fit'
+
+ model.fit(X=data_train, eval_data=data_val,
+ eval_metric = mx.metric.np(Accuracy),
+ batch_end_callback=mx.callback.Speedometer(BATCH_SIZE, 50),)
+
+ model.save("ocr")
diff --git a/example/warpctc/toy_ctc.py b/example/warpctc/toy_ctc.py
new file mode 100644
index 000000000000..2caa11e68399
--- /dev/null
+++ b/example/warpctc/toy_ctc.py
@@ -0,0 +1,163 @@
+# pylint: disable=C0111,too-many-arguments,too-many-instance-attributes,too-many-locals,redefined-outer-name,fixme
+# pylint: disable=superfluous-parens, no-member, invalid-name
+import sys
+sys.path.insert(0, "../../python")
+import numpy as np
+import mxnet as mx
+import random
+from lstm import lstm_unroll
+
+class SimpleBatch(object):
+ def __init__(self, data_names, data, label_names, label):
+ self.data = data
+ self.label = label
+ self.data_names = data_names
+ self.label_names = label_names
+
+ self.pad = 0
+ self.index = None # TODO: what is index?
+
+ @property
+ def provide_data(self):
+ return [(n, x.shape) for n, x in zip(self.data_names, self.data)]
+
+ @property
+ def provide_label(self):
+ return [(n, x.shape) for n, x in zip(self.label_names, self.label)]
+
+def gen_feature(n):
+ ret = np.zeros(10)
+ ret[n] = 1
+ return ret
+
+def gen_rand():
+ num = random.randint(0, 9999)
+ buf = str(num)
+ while len(buf) < 4:
+ buf = "0" + buf
+ ret = np.array([])
+ for i in range(80):
+ c = int(buf[i / 20])
+ ret = np.concatenate([ret, gen_feature(c)])
+ return buf, ret
+
+def get_label(buf):
+ ret = np.zeros(4)
+ for i in range(4):
+ ret[i] = 1 + int(buf[i])
+ return ret
+
+class DataIter(mx.io.DataIter):
+ def __init__(self, count, batch_size, num_label, init_states):
+ super(DataIter, self).__init__()
+ self.batch_size = batch_size
+ self.count = count
+ self.num_label = num_label
+ self.init_states = init_states
+ self.init_state_arrays = [mx.nd.zeros(x[1]) for x in init_states]
+ self.provide_data = [('data', (batch_size, 10 * 80))] + init_states
+ self.provide_label = [('label', (self.batch_size, 4))]
+
+ def __iter__(self):
+ init_state_names = [x[0] for x in self.init_states]
+ for k in range(self.count):
+ data = []
+ label = []
+ for i in range(self.batch_size):
+ num, img = gen_rand()
+ data.append(img)
+ label.append(get_label(num))
+
+ data_all = [mx.nd.array(data)] + self.init_state_arrays
+ label_all = [mx.nd.array(label)]
+ data_names = ['data'] + init_state_names
+ label_names = ['label']
+
+
+ data_batch = SimpleBatch(data_names, data_all, label_names, label_all)
+ yield data_batch
+
+ def reset(self):
+ pass
+
+BATCH_SIZE = 32
+SEQ_LENGTH = 80
+
+def ctc_label(p):
+ ret = []
+ p1 = [0] + p
+ for i in range(len(p)):
+ c1 = p1[i]
+ c2 = p1[i+1]
+ if c2 == 0 or c2 == c1:
+ continue
+ ret.append(c2)
+ return ret
+
+
+def Accuracy(label, pred):
+ global BATCH_SIZE
+ global SEQ_LENGTH
+ hit = 0.
+ total = 0.
+ for i in range(BATCH_SIZE):
+ l = label[i]
+ p = []
+ for k in range(SEQ_LENGTH):
+ p.append(np.argmax(pred[k * BATCH_SIZE + i]))
+ p = ctc_label(p)
+ if len(p) == len(l):
+ match = True
+ for k in range(len(p)):
+ if p[k] != int(l[k]):
+ match = False
+ break
+ if match:
+ hit += 1.0
+ total += 1.0
+ return hit / total
+
+if __name__ == '__main__':
+ num_hidden = 100
+ num_lstm_layer = 1
+
+ num_epoch = 10
+ learning_rate = 0.001
+ momentum = 0.9
+ num_label = 4
+
+ contexts = [mx.context.gpu(0)]
+
+ def sym_gen(seq_len):
+ return lstm_unroll(num_lstm_layer, seq_len,
+ num_hidden=num_hidden,
+ num_label = num_label)
+
+ init_c = [('l%d_init_c'%l, (BATCH_SIZE, num_hidden)) for l in range(num_lstm_layer)]
+ init_h = [('l%d_init_h'%l, (BATCH_SIZE, num_hidden)) for l in range(num_lstm_layer)]
+ init_states = init_c + init_h
+
+ data_train = DataIter(100000, BATCH_SIZE, num_label, init_states)
+ data_val = DataIter(1000, BATCH_SIZE, num_label, init_states)
+
+ symbol = sym_gen(SEQ_LENGTH)
+
+ model = mx.model.FeedForward(ctx=contexts,
+ symbol=symbol,
+ num_epoch=num_epoch,
+ learning_rate=learning_rate,
+ momentum=momentum,
+ wd=0.00001,
+ initializer=mx.init.Xavier(factor_type="in", magnitude=2.34))
+
+ import logging
+ head = '%(asctime)-15s %(message)s'
+ logging.basicConfig(level=logging.DEBUG, format=head)
+
+ print 'begin fit'
+
+ model.fit(X=data_train, eval_data=data_val,
+ eval_metric = mx.metric.np(Accuracy),
+ batch_end_callback=mx.callback.Speedometer(BATCH_SIZE, 50),)
+
+ model.save("ocr")
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 097e3eb603bd..28bc89406c0b 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -479,6 +479,14 @@ MXNET_DLL int MXFuncInvokeEx(FunctionHandle fun,
*/
MXNET_DLL int MXSymbolListAtomicSymbolCreators(mx_uint *out_size,
AtomicSymbolCreator **out_array);
+
+/*!
+ * \brief Get the name of an atomic symbol.
+ * \param creator the AtomicSymbolCreator.
+ * \param name The returned name of the creator.
+ */
+MXNET_DLL int MXSymbolGetAtomicSymbolName(AtomicSymbolCreator creator,
+ const char **name);
/*!
* \brief Get the detailed information about atomic symbol.
* \param creator the AtomicSymbolCreator.
diff --git a/include/mxnet/mxrtc.h b/include/mxnet/mxrtc.h
index a45badb1d3dc..9de59f63da2a 100644
--- a/include/mxnet/mxrtc.h
+++ b/include/mxnet/mxrtc.h
@@ -60,7 +60,7 @@ class MXRtc {
unsigned int block_dim_Z);
private:
- static const std::string str_type;
+ static const char str_type[];
static std::unordered_map kernel_registry;
std::string name_;
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 3649b36e0e05..e4f15082b398 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -65,11 +65,12 @@ class NDArray {
* \return the data TBlob
*/
inline TBlob data() const {
+ TBlob res;
MSHADOW_TYPE_SWITCH(dtype_, DType, {
- return TBlob(static_cast(ptr_->shandle.dptr)
+ res = TBlob(static_cast(ptr_->shandle.dptr)
+ offset_, shape_, ptr_->shandle.ctx.dev_mask());
});
- return TBlob();
+ return res;
}
/*!
* \return the context of NDArray, this function is only valid when the NDArray is not empty
diff --git a/include/mxnet/operator_util.h b/include/mxnet/operator_util.h
index f96b85108b47..71276a4bec5f 100644
--- a/include/mxnet/operator_util.h
+++ b/include/mxnet/operator_util.h
@@ -11,6 +11,10 @@
#ifndef MXNET_OPERATOR_UTIL_H_
#define MXNET_OPERATOR_UTIL_H_
+#ifdef _MSC_VER
+#pragma warning(disable:4503) // disable warning: decorated name length exceeded.
+#endif
+
#include
#include
#include
@@ -56,6 +60,26 @@ struct EnvArguments {
std::vector resource;
};
+/*!
+ * \brief source function that generate output based on env
+ * The result container is pre-allocated with the correct shape.
+ * \param env The Environment arguments.
+ * \param ret The containter to store return value.
+ * \param req The requirement to stroe the ret.
+ * \param ctx Runtime context to execute the function.
+ */
+typedef void (*SourceFunction)(const EnvArguments& env,
+ TBlob* ret,
+ OpReqType req,
+ RunContext ctx);
+
+/*!
+ * \brief Shape inference function to get the correct shape.
+ * \param env The Environment arguments.
+ * \return The inferred result shape.
+ */
+typedef TShape (*SourceShapeFunction)(const EnvArguments& env);
+
/*!
* \brief Unary function that takes a src and save result to ret.
* The result container is pre-allocated with the correct shape.
@@ -261,6 +285,11 @@ class SimpleOpRegEntry {
* \param req the request.
*/
virtual TSelf& set_resource_request(ResourceRequest req) = 0;
+ /*!
+ * \brief set source inference function.
+ * \param fshapeinfer The source function that peforms the operation.
+ */
+ virtual TSelf& set_shape_function(SourceShapeFunction fshapeinfer) = 0;
/*!
* \brief set shape inference function.
* Default: out_shape = in_shape
@@ -273,6 +302,16 @@ class SimpleOpRegEntry {
* \param fshapeinfer The binary function that peforms the operation.
*/
virtual TSelf& set_shape_function(BinaryShapeFunction fshapeinfer) = 0;
+ /*!
+ * \brief set function of the function to be fsource
+ * \param dev_mask The device mask of the function can act on.
+ * \param fsource The unary function that peforms the operation.
+ * \param register_symbolic Whether register a symbolic operator as well.
+ */
+ virtual TSelf& set_function(
+ int dev_mask,
+ SourceFunction fsource,
+ SimpleOpRegOption register_symbolic = kRegisterSymbolic) = 0;
/*!
* \brief set function of the function to be funary
* \param dev_mask The device mask of the function can act on.
@@ -412,47 +451,9 @@ class SimpleOpRegistry {
}
/*!
-* \brief cast dynamic range variable into static variable
-* \param var the source value, constrained to be between 1 and 5
-* \param NDIM the const NDIM that can be used in the template
+* \brief Maximum ndim supported for special operators like broadcasting with non contiguous lhs/rhs
*/
-#define MXNET_RANGE_SWITCH(var, NDIM, ...) \
- { \
- switch (var) { \
- case 1: \
- { \
- static const int NDIM = 1; \
- {__VA_ARGS__} \
- } \
- break; \
- case 2: \
- { \
- static const int NDIM = 2; \
- {__VA_ARGS__} \
- } \
- break; \
- case 3: \
- { \
- static const int NDIM = 3; \
- {__VA_ARGS__} \
- } \
- break; \
- case 4: \
- { \
- static const int NDIM = 4; \
- {__VA_ARGS__} \
- } \
- break; \
- case 5: \
- { \
- static const int NDIM = 5; \
- {__VA_ARGS__} \
- } \
- break; \
- default: \
- LOG(FATAL) << "Only support ndim=1 to 5."; \
- } \
- }
+#define MXNET_SPECIAL_MAX_NDIM 7
//--------------------------------------------------------------
diff --git a/include/mxnet/resource.h b/include/mxnet/resource.h
index 31c380dd8503..da41cb07e52d 100644
--- a/include/mxnet/resource.h
+++ b/include/mxnet/resource.h
@@ -74,6 +74,10 @@ struct Resource {
* \brief Get space requested as mshadow Tensor.
* The caller can request arbitrary size.
*
+ * This space can be shared with other calls to this->get_space.
+ * So the caller need to serialize the calls when using the conflicted space.
+ * The temp space will remain valid until release is called.
+ *
* \param shape the Shape of returning tensor.
* \param stream the stream of retruning tensor.
* \return the mshadow tensor requested.
@@ -132,6 +136,16 @@ struct Resource {
reinterpret_cast(get_host_space_internal(shape.Size() * sizeof(DType))),
shape, shape[ndim - 1], NULL);
}
+ /*!
+ * \brief Release the all existing allocated space.
+ * The existing allocated address will remain valdd
+ * until release is called.
+ *
+ * Even if user do not call release, the space occupation
+ * of the resource will remain at most two times of maximum
+ * requested space.
+ */
+ void release() const;
/*!
* \brief internal function to get space from resources.
* \param size The size of the space.
diff --git a/make/config.mk b/make/config.mk
index bbd19e56b5d7..aa3986a21673 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -116,6 +116,9 @@ EXTRA_OPERATORS =
# TORCH_PATH = $(HOME)/torch
# MXNET_PLUGINS += plugin/torch/torch.mk
+# WARPCTC_PATH = $(HOME)/warp-ctc
+# MXNET_PLUGINS += plugin/warpctc/warpctc.mk
+
# whether to use sframe integration. This requires build sframe
# git@github.com:dato-code/SFrame.git
# SFRAME_PATH = $(HOME)/SFrame
diff --git a/mshadow b/mshadow
index 310e015e5c65..44d61f8ef9d8 160000
--- a/mshadow
+++ b/mshadow
@@ -1 +1 @@
-Subproject commit 310e015e5c65bd5314e57fc79ceb06b162547325
+Subproject commit 44d61f8ef9d86e85e7bc62b2a1d4dc40554672f1
diff --git a/plugin/opencv/__init__.py b/plugin/opencv/__init__.py
new file mode 100644
index 000000000000..072575177e41
--- /dev/null
+++ b/plugin/opencv/__init__.py
@@ -0,0 +1,6 @@
+# coding: utf-8
+# pylint: disable=wildcard-import
+
+"""Opencv plugin for mxnet"""
+from .opencv import *
+
diff --git a/plugin/opencv/cv_api.cc b/plugin/opencv/cv_api.cc
new file mode 100644
index 000000000000..78bec01548d4
--- /dev/null
+++ b/plugin/opencv/cv_api.cc
@@ -0,0 +1,149 @@
+/*!
+ * Copyright (c) 2016 by Contributors
+ * \file cv_api.h
+ * \brief C API for opencv
+ * \author Junyuan Xie
+ */
+#include
+#include
+#include
+#include
+#include "cv_api.h"
+#include "../../src/c_api/c_api_error.h"
+
+
+using namespace mxnet;
+// http://www.64lines.com/jpeg-width-height
+// Gets the JPEG size from the array of data passed to the function, file reference: http://www.obrador.com/essentialjpeg/headerinfo.htm
+bool get_jpeg_size(const unsigned char* data, mx_uint data_size, mx_uint *width, mx_uint *height) {
+ // Check for valid JPEG image
+ mx_uint i = 0; // Keeps track of the position within the file
+ if (data[i] == 0xFF && data[i+1] == 0xD8 && data[i+2] == 0xFF && data[i+3] == 0xE0) {
+ i += 4;
+ // Check for valid JPEG header (null terminated JFIF)
+ if (data[i+2] == 'J' && data[i+3] == 'F' && data[i+4] == 'I'
+ && data[i+5] == 'F' && data[i+6] == 0x00) {
+ // Retrieve the block length of the first block since
+ // the first block will not contain the size of file
+ uint16_t block_length = data[i] * 256 + data[i+1];
+ while (i < data_size) {
+ i+=block_length; // Increase the file index to get to the next block
+ if (i >= data_size) return false; // Check to protect against segmentation faults
+ if (data[i] != 0xFF) return false; // Check that we are truly at the start of another block
+ if (data[i+1] == 0xC0) {
+ // 0xFFC0 is the "Start of frame" marker which contains the file size
+ // The structure of the 0xFFC0 block is quite simple
+ // [0xFFC0][ushort length][uchar precision][ushort x][ushort y]
+ *height = data[i+5]*256 + data[i+6];
+ *width = data[i+7]*256 + data[i+8];
+ return true;
+ } else {
+ i+=2; // Skip the block marker
+ block_length = data[i] * 256 + data[i+1]; // Go to the next block
+ }
+ }
+ return false; // If this point is reached then no size was found
+ } else {
+ return false; // Not a valid JFIF string
+ }
+ } else {
+ return false; // Not a valid SOI header
+ }
+}
+
+bool get_png_size(const unsigned char* data, mx_uint data_size, mx_uint *width, mx_uint *height) {
+ if (data[0] == 0x89 && data[1] == 0x50 && data[2] ==0x4E && data[3] == 0x47) {
+ unsigned char const* p = data + 16;
+ *width = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3];
+ p += 4;
+ *height = ((p[0]*256 + p[1])*256 + p[2])*256 + p[3];
+ return true;
+ } else {
+ return false;
+ }
+}
+
+MXNET_DLL int MXCVImdecode(const unsigned char *img, const mx_uint len,
+ const int flag, NDArrayHandle *out) {
+ API_BEGIN();
+ mx_uint dims[3];
+ CHECK_GE(flag, 0) << "flag must be 0 (grayscale) or 1 (colored).";
+ dims[2] = flag == 0 ? 1 : 3;
+ if (get_jpeg_size(img, len, dims+1, dims)) {
+ } else if (get_png_size(img, len, dims+1, dims)) {
+ } else {
+ LOG(FATAL) << "Only supports png and jpg.";
+ }
+ NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+ unsigned char *img_cpy = new unsigned char[len];
+ memcpy(img_cpy, img, sizeof(unsigned char)*len);
+ Engine::Get()->PushSync([=](RunContext ctx){
+ ndout.CheckAndAlloc();
+ cv::Mat buf(1, len, CV_8U, img_cpy);
+ cv::Mat dst(dims[0], dims[1], flag == 0 ? CV_8U : CV_8UC3, ndout.data().dptr_);
+ cv::imdecode(buf, flag, &dst);
+ CHECK(!dst.empty());
+ delete[] img_cpy;
+ }, ndout.ctx(), {}, {ndout.var()});
+ NDArray *tmp = new NDArray();
+ *tmp = ndout;
+ *out = tmp;
+ API_END();
+}
+
+
+MXNET_DLL int MXCVResize(NDArrayHandle src, const mx_uint w, const mx_uint h,
+ const int interpolation, NDArrayHandle *out) {
+ API_BEGIN();
+ NDArray ndsrc = *static_cast(src);
+ CHECK_EQ(ndsrc.shape().ndim(), 3);
+ CHECK_EQ(ndsrc.ctx(), Context::CPU());
+ CHECK_EQ(ndsrc.dtype(), mshadow::kUint8);
+
+ mx_uint dims[3] = {h, w, ndsrc.shape()[2]};
+ NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+
+ Engine::Get()->PushSync([=](RunContext ctx){
+ ndout.CheckAndAlloc();
+ cv::Mat buf(ndsrc.shape()[0], ndsrc.shape()[1],
+ dims[2] == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_);
+ cv::Mat dst(h, w, dims[2] == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_);
+ cv::resize(buf, dst, cv::Size(w, h), 0, 0, interpolation);
+ CHECK(!dst.empty());
+ }, ndout.ctx(), {ndsrc.var()}, {ndout.var()});
+ NDArray *tmp = new NDArray();
+ *tmp = ndout;
+ *out = tmp;
+ API_END();
+}
+
+MXNET_DLL int MXCVcopyMakeBorder(NDArrayHandle src,
+ const int top,
+ const int bot,
+ const int left,
+ const int right,
+ const int type,
+ const double value,
+ NDArrayHandle *out) {
+ API_BEGIN();
+ NDArray ndsrc = *static_cast(src);
+ CHECK_EQ(ndsrc.shape().ndim(), 3);
+ CHECK_EQ(ndsrc.ctx(), Context::CPU());
+ CHECK_EQ(ndsrc.dtype(), mshadow::kUint8);
+
+ int h = ndsrc.shape()[0], w = ndsrc.shape()[1], c = ndsrc.shape()[2];
+ mx_uint dims[3] = {top+h+bot, left+w+right, c};
+ NDArray ndout(TShape(dims, dims+3), Context::CPU(), true, mshadow::kUint8);
+
+ Engine::Get()->PushSync([=](RunContext ctx){
+ ndout.CheckAndAlloc();
+ cv::Mat buf(h, w, c == 3 ? CV_8UC3 : CV_8U, ndsrc.data().dptr_);
+ cv::Mat dst(top+h+bot, left+w+right, c == 3 ? CV_8UC3 : CV_8U, ndout.data().dptr_);
+ cv::copyMakeBorder(buf, dst, top, bot, left, right, type, cv::Scalar(value));
+ CHECK(!dst.empty());
+ }, ndout.ctx(), {ndsrc.var()}, {ndout.var()});
+ NDArray *tmp = new NDArray();
+ *tmp = ndout;
+ *out = tmp;
+ API_END();
+}
diff --git a/plugin/opencv/cv_api.h b/plugin/opencv/cv_api.h
new file mode 100644
index 000000000000..fc224d0e1d05
--- /dev/null
+++ b/plugin/opencv/cv_api.h
@@ -0,0 +1,35 @@
+/*!
+ * Copyright (c) 2016 by Contributors
+ * \file cv_api.h
+ * \brief C API for opencv
+ * \author Junyuan Xie
+ */
+#ifndef PLUGIN_OPENCV_CV_API_H_
+#define PLUGIN_OPENCV_CV_API_H_
+
+#include
+
+MXNET_DLL int MXCVImdecode(
+ const unsigned char *img,
+ const mx_uint len,
+ const int flag,
+ NDArrayHandle *out);
+
+MXNET_DLL int MXCVResize(
+ NDArrayHandle src,
+ const mx_uint w,
+ const mx_uint h,
+ const int interpolation,
+ NDArrayHandle *out);
+
+MXNET_DLL int MXCVcopyMakeBorder(
+ NDArrayHandle src,
+ const int top,
+ const int bot,
+ const int left,
+ const int right,
+ const int type,
+ const double value,
+ NDArrayHandle *out);
+
+#endif // PLUGIN_OPENCV_CV_API_H_
diff --git a/plugin/opencv/opencv.mk b/plugin/opencv/opencv.mk
new file mode 100644
index 000000000000..ab1f6ff2ee03
--- /dev/null
+++ b/plugin/opencv/opencv.mk
@@ -0,0 +1,4 @@
+OPENCV_SRC = $(wildcard plugin/opencv/*.cc)
+PLUGIN_OBJ += $(patsubst %.cc, build/%.o, $(OPENCV_SRC))
+OPENCV_CUSRC = $(wildcard plugin/opencv/*.cu)
+PLUGIN_CUOBJ += $(patsubst %.cu, build/%_gpu.o, $(OPENCV_CUSRC))
diff --git a/plugin/opencv/opencv.py b/plugin/opencv/opencv.py
new file mode 100644
index 000000000000..6ee5be13f643
--- /dev/null
+++ b/plugin/opencv/opencv.py
@@ -0,0 +1,173 @@
+# coding: utf-8
+# pylint: disable=too-many-arguments,no-member,invalid-name
+
+"""Opencv plugin for mxnet"""
+import random
+import ctypes
+import cv2
+import mxnet as mx
+from mxnet.base import _LIB
+from mxnet.base import mx_uint, NDArrayHandle, check_call
+
+def imdecode(str_img, flag=1):
+ """Decode image from str buffer.
+ Wrapper for cv2.imdecode that uses mx.nd.NDArray
+
+ Parameters
+ ----------
+ str_img : str
+ str buffer read from image file
+ flag : int
+ same as flag for cv2.imdecode
+ Returns
+ -------
+ img : NDArray
+ decoded image in (width, height, channels)
+ with BGR color channel order
+ """
+ hdl = NDArrayHandle()
+ check_call(_LIB.MXCVImdecode(ctypes.c_char_p(str_img),
+ mx_uint(len(str_img)),
+ flag, ctypes.byref(hdl)))
+ return mx.nd.NDArray(hdl)
+
+def resize(src, size, interpolation=cv2.INTER_LINEAR):
+ """Decode image from str buffer.
+ Wrapper for cv2.imresize that uses mx.nd.NDArray
+
+ Parameters
+ ----------
+ src : NDArray
+ image in (width, height, channels)
+ size : tuple
+ target size in (width, height)
+ interpolation : int
+ same as interpolation for cv2.imresize
+
+ Returns
+ -------
+ img : NDArray
+ resized image
+ """
+ hdl = NDArrayHandle()
+ check_call(_LIB.MXCVResize(src.handle, mx_uint(size[0]), mx_uint(size[1]),
+ interpolation, ctypes.byref(hdl)))
+ return mx.nd.NDArray(hdl)
+
+def copyMakeBorder(src, top, bot, left, right, border_type=cv2.BORDER_CONSTANT, value=0):
+ """Pad image border
+ Wrapper for cv2.copyMakeBorder that uses mx.nd.NDArray
+
+ Parameters
+ ----------
+ src : NDArray
+ Image in (width, height, channels).
+ Others are the same with cv2.copyMakeBorder
+
+ Returns
+ -------
+ img : NDArray
+ padded image
+ """
+ hdl = NDArrayHandle()
+ check_call(_LIB.MXCVcopyMakeBorder(src.handle, ctypes.c_int(top), ctypes.c_int(bot),
+ ctypes.c_int(left), ctypes.c_int(right),
+ ctypes.c_int(border_type), ctypes.c_double(value),
+ ctypes.byref(hdl)))
+ return mx.nd.NDArray(hdl)
+
+
+def scale_down(src_size, size):
+ """Scale down crop size if it's bigger than image size"""
+ w, h = size
+ sw, sh = src_size
+ if sh < h:
+ w, h = float(w*sh)/h, sh
+ if sw < w:
+ w, h = sw, float(h*sw)/w
+ return int(w), int(h)
+
+def fixed_crop(src, x0, y0, w, h, size=None, interpolation=cv2.INTER_CUBIC):
+ """Crop src at fixed location, and (optionally) resize it to size"""
+ out = mx.nd.crop(src, begin=(y0, x0, 0), end=(y0+h, x0+w, int(src.shape[2])))
+ if size is not None and (w, h) != size:
+ out = resize(out, size, interpolation=interpolation)
+ return out
+
+def random_crop(src, size):
+ """Randomly crop src with size. Upsample result if src is smaller than size"""
+ h, w, _ = src.shape
+ new_w, new_h = scale_down((w, h), size)
+
+ x0 = random.randint(0, w - new_w)
+ y0 = random.randint(0, h - new_h)
+
+ out = fixed_crop(src, x0, y0, new_w, new_h, size)
+ return out, (x0, y0, new_w, new_h)
+
+def color_normalize(src, mean, std):
+ """Normalize src with mean and std"""
+ src -= mean
+ src /= std
+ return src
+
+def random_size_crop(src, size, min_area=0.25, ratio=(3.0/4.0, 4.0/3.0)):
+ """Randomly crop src with size. Randomize area and aspect ratio"""
+ h, w, _ = src.shape
+ area = w*h
+ for _ in range(10):
+ new_area = random.uniform(min_area, 1.0) * area
+ new_ratio = random.uniform(*ratio)
+ new_w = int(new_area*new_ratio)
+ new_h = int(new_area/new_ratio)
+
+ if random.uniform(0., 1.) < 0.5:
+ new_w, new_h = new_h, new_w
+
+ if new_w > w or new_h > h:
+ continue
+
+ x0 = random.randint(0, w - new_w)
+ y0 = random.randint(0, h - new_h)
+
+ out = fixed_crop(src, x0, y0, new_w, new_h, size)
+ return out, (x0, y0, new_w, new_h)
+
+ return random_crop(src, size)
+
+class ImageListIter(mx.io.DataIter):
+ """An example image iterator using opencv plugin"""
+ def __init__(self, root, flist, batch_size, size, mean=None):
+ super(ImageListIter, self).__init__()
+ self.root = root
+ self.list = [line.strip() for line in open(flist).readlines()]
+ self.cur = 0
+ self.batch_size = batch_size
+ self.size = size
+ if mean is not None:
+ self.mean = mx.nd.array(mean)
+ else:
+ self.mean = None
+
+ def reset(self):
+ self.cur = 0
+
+ def next(self):
+ batch = mx.nd.zeros((self.batch_size, self.size[1], self.size[0], 3))
+ i = self.cur
+ for i in range(self.cur, min(len(self.list), self.cur+self.batch_size)):
+ str_img = open(self.root+self.list[i]+'.jpg').read()
+ img = imdecode(str_img, 1)
+ img, _ = random_crop(img, self.size)
+ batch[i - self.cur] = img
+ batch = mx.nd.transpose(batch, axes=(0, 3, 1, 2))
+ ret = mx.io.DataBatch(data=[batch],
+ label=[],
+ pad=self.batch_size-(i-self.cur),
+ index=None)
+ self.cur = i
+ return ret
+
+
+
+
diff --git a/plugin/warpctc/warpctc-inl.h b/plugin/warpctc/warpctc-inl.h
new file mode 100644
index 000000000000..b37132144cb9
--- /dev/null
+++ b/plugin/warpctc/warpctc-inl.h
@@ -0,0 +1,295 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file warpctc-inl.h
+ * \brief warpctc operator
+ * \author Liang Xiang
+*/
+#ifndef PLUGIN_WARPCTC_WARPCTC_INL_H_
+#define PLUGIN_WARPCTC_WARPCTC_INL_H_
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "../../src/operator/operator_common.h"
+
+namespace mxnet {
+namespace op {
+
+namespace warpctc_enum {
+ enum CTCOpInputs {kData, kLabel};
+ enum CTCOpOutputs {kOut};
+} // namespace warpctc_enum
+
+struct WarpCTCParam : public dmlc::Parameter {
+ int label_length;
+ int input_length;
+ DMLC_DECLARE_PARAMETER(WarpCTCParam) {
+ DMLC_DECLARE_FIELD(label_length)
+ .set_default(0)
+ .describe("Real label length");
+ DMLC_DECLARE_FIELD(input_length)
+ .set_default(0)
+ .describe("Input length");
+ }
+};
+
+template
+class WarpCTCOp : public Operator {
+ private:
+ WarpCTCParam param_;
+
+ public:
+ explicit WarpCTCOp(WarpCTCParam p) {
+ this->param_ = p;
+ }
+
+ ~WarpCTCOp() {
+ }
+
+ inline void throw_on_error(ctcStatus_t status, const char* message) {
+ if (status != CTC_STATUS_SUCCESS) {
+ throw std::runtime_error(message
+ + (", stat = "
+ + std::string(ctcGetStatusString(status))));
+ }
+ }
+
+ virtual void Forward(const OpContext &ctx,
+ const std::vector &in_data,
+ const std::vector &req,
+ const std::vector &out_data,
+ const std::vector &aux_args) {
+ using namespace mshadow;
+ using namespace mshadow::expr;
+ CHECK_EQ(in_data.size(), 2) << "CTCOutput Input: [data, label]";
+ CHECK_EQ(out_data.size(), 1) << "CTCOutput Output: [output]";
+
+ Stream *s = ctx.get_stream();
+ TBlob data = in_data[warpctc_enum::kData];
+ TBlob out = out_data[warpctc_enum::kOut];
+ Tensor data_tensor = data.FlatTo2D(s);
+ Tensor out_tensor = out.FlatTo2D(s);
+ Softmax(out_tensor, data_tensor);
+ }
+
+ std::vector labelLengths(const int * flat_labels, int minibatch,
+ int size, int blank, int * total_length) {
+ CHECK_EQ(param_.label_length * minibatch, size)
+ << "label size should = label_length * minibatch";
+ std::vector ret(minibatch, 0);
+ for (int i = 0; i < size; i++) {
+ if (flat_labels[i] == blank) {
+ continue;
+ }
+ int b = i / param_.label_length;
+ ret[b]++;
+ (*total_length)++;
+ }
+ return ret;
+ }
+
+ void removeBlank(const int * flat_labels, int * cpu_labels,
+ int size, int blank) {
+ int k = 0;
+ for (int i = 0; i < size; i++) {
+ if (flat_labels[i] != blank) {
+ cpu_labels[k] = flat_labels[i];
+ k += 1;
+ }
+ }
+ }
+
+ virtual void Backward(const OpContext &ctx,
+ const std::vector &out_grad,
+ const std::vector &in_data,
+ const std::vector &out_data,
+ const std::vector &req,
+ const std::vector &in_grad,
+ const std::vector &aux_args) {
+ using namespace mshadow;
+ TBlob data = in_data[warpctc_enum::kData];
+ TBlob label = in_data[warpctc_enum::kLabel];
+ CHECK_EQ(data.shape_.ndim(), 2) << "input data shape should be 2 (t*n, p)";
+ ctcComputeInfo info;
+ if (data.dev_mask_ == cpu::kDevMask) {
+ info.loc = CTC_CPU;
+ info.num_threads = 1;
+ } else if (data.dev_mask_ == gpu::kDevMask) {
+#if MXNET_USE_CUDA
+ info.loc = CTC_GPU;
+ info.stream = ctx.get_stream()->stream_;
+#endif
+ } else {
+ LOG(FATAL) << "Unknown device type " << data.dev_mask_;
+ }
+
+ int T = param_.input_length;
+ int minibatch = data.shape_[0] / T;
+ int alphabet_size = data.shape_[1];
+ std::vector input_lengths;
+ for (int i = 0; i < minibatch; i++) {
+ input_lengths.push_back(T);
+ }
+
+#if MXNET_USE_CUDA
+ cudaError_t cuda_status;
+#endif
+ float* activations = static_cast(data.dptr_);
+ int* flat_labels = static_cast(label.dptr_);
+ int* cpu_raw_labels = flat_labels;
+ float* grads = static_cast(in_grad[warpctc_enum::kData].dptr_);
+ if (data.dev_mask_ == gpu::kDevMask) {
+#if MXNET_USE_CUDA
+ cpu_raw_labels = reinterpret_cast(malloc(sizeof(int) * label.Size()));
+ cuda_status = cudaMemcpyAsync(cpu_raw_labels, flat_labels,
+ label.Size()*sizeof(int),
+ cudaMemcpyDeviceToHost,
+ ctx.get_stream()->stream_);
+ CHECK_EQ(cuda_status, cudaSuccess) << "cuda memcpy label error";
+#endif
+ }
+
+ int total_label_length = 0;
+ std::vector label_lengths = labelLengths(cpu_raw_labels,
+ minibatch,
+ label.Size(),
+ 0, &total_label_length);
+ int* cpu_labels = reinterpret_cast(
+ malloc(sizeof(int) * total_label_length));
+ removeBlank(cpu_raw_labels, cpu_labels, label.Size(), 0);
+
+ size_t alloc_bytes;
+ throw_on_error(get_workspace_size(label_lengths.data(),
+ input_lengths.data(),
+ alphabet_size,
+ input_lengths.size(), info,
+ &alloc_bytes),
+ "Error: get_workspace_size in inf_test");
+ void* ctc_workspace;
+
+ if (data.dev_mask_ == cpu::kDevMask) {
+ ctc_workspace = malloc(alloc_bytes);
+ } else if (data.dev_mask_ == gpu::kDevMask) {
+#if MXNET_USE_CUDA
+ cuda_status = cudaMalloc(&ctc_workspace, alloc_bytes);
+ CHECK_EQ(cuda_status, cudaSuccess) << "cuda malloc worksapce fail";
+#endif
+ }
+ std::vector costs(minibatch);
+ throw_on_error(compute_ctc_loss(activations,
+ grads,
+ cpu_labels,
+ label_lengths.data(),
+ input_lengths.data(),
+ alphabet_size,
+ minibatch,
+ costs.data(),
+ ctc_workspace,
+ info),
+ "Error: compute_ctc_loss");
+
+ if (data.dev_mask_ == cpu::kDevMask) {
+ free(ctc_workspace);
+ free(cpu_labels);
+ } else if (data.dev_mask_ == gpu::kDevMask) {
+#if MXNET_USE_CUDA
+ cuda_status = cudaFree(ctc_workspace);
+ CHECK_EQ(cuda_status, cudaSuccess) << "cuda free workspace fail";
+ free(cpu_raw_labels);
+ free(cpu_labels);
+#endif
+ }
+ }
+};
+
+template
+Operator* CreateOp(WarpCTCParam type);
+
+
+#if DMLC_USE_CXX11
+class WarpCTCProp : public OperatorProperty {
+ public:
+ std::vector ListArguments() const override {
+ return {"data", "label"};
+ }
+
+ virtual std::vector ListOutputs() const {
+ return {"output"};
+ }
+
+ void Init(const std::vector >& kwargs)
+ override {
+ param_.Init(kwargs);
+ }
+
+ std::map GetParams() const override {
+ return param_.__DICT__();
+ }
+
+ bool InferShape(std::vector *in_shape,
+ std::vector *out_shape,
+ std::vector *aux_shape) const override {
+ using namespace mshadow;
+ CHECK_EQ(in_shape->size(), 2) << "Input:[data, label]";
+ const TShape &dshape = in_shape->at(0);
+ if (dshape.ndim() == 0) return false;
+ TShape label_shape(dshape.ndim() - 1);
+ label_shape[0] = param_.label_length * (dshape[0] / param_.input_length);
+ SHAPE_ASSIGN_CHECK(*in_shape, warpctc_enum::kLabel, label_shape);
+
+ out_shape->clear();
+ out_shape->push_back(dshape);
+ return true;
+ }
+
+ virtual bool InferType(std::vector *in_type,
+ std::vector *out_type,
+ std::vector *aux_type) const {
+ CHECK_LE(in_type->size(), this->ListArguments().size());
+ in_type->clear();
+ in_type->push_back(mshadow::kFloat32);
+ in_type->push_back(mshadow::kInt32);
+ out_type->clear();
+ out_type->push_back(mshadow::kFloat32);
+ return true;
+ }
+
+ OperatorProperty* Copy() const override {
+ auto ptr = new WarpCTCProp();
+ ptr->param_ = param_;
+ return ptr;
+ }
+
+ std::string TypeString() const override {
+ return "WarpCTC";
+ }
+
+
+ std::vector DeclareBackwardDependency(const std::vector &out_grad,
+ const std::vector &in_data,
+ const std::vector &out_data)
+ const override {
+ return {in_data[warpctc_enum::kData],
+ in_data[warpctc_enum::kLabel],
+ out_data[warpctc_enum::kOut]};
+ }
+
+ Operator* CreateOperator(Context ctx) const override;
+
+ private:
+ WarpCTCParam param_;
+};
+#endif // DMLC_USE_CXX11
+
+} // namespace op
+} // namespace mxnet
+
+#endif // PLUGIN_WARPCTC_WARPCTC_INL_H_
diff --git a/plugin/warpctc/warpctc.cc b/plugin/warpctc/warpctc.cc
new file mode 100644
index 000000000000..db88a3316c7e
--- /dev/null
+++ b/plugin/warpctc/warpctc.cc
@@ -0,0 +1,29 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file warpctc.cc
+ * \brief warpctc op
+ * \author Liang Xiang
+*/
+
+#include "./warpctc-inl.h"
+#include "../../src/operator/mshadow_op.h"
+
+namespace mxnet {
+namespace op {
+template<>
+Operator *CreateOp(WarpCTCParam param) {
+ return new WarpCTCOp(param);
+}
+
+Operator *WarpCTCProp::CreateOperator(Context ctx) const {
+ DO_BIND_DISPATCH(CreateOp, param_);
+}
+
+DMLC_REGISTER_PARAMETER(WarpCTCParam);
+
+MXNET_REGISTER_OP_PROPERTY(WarpCTC, WarpCTCProp)
+.describe("warp ctc.")
+.add_arguments(WarpCTCParam::__FIELDS__());
+
+} // namespace op
+} // namespace mxnet
diff --git a/plugin/warpctc/warpctc.cu b/plugin/warpctc/warpctc.cu
new file mode 100644
index 000000000000..186c4d0c18f4
--- /dev/null
+++ b/plugin/warpctc/warpctc.cu
@@ -0,0 +1,19 @@
+/*!
+ * Copyright (c) 2015 by Contributors
+ * \file warpctc.cc
+ * \brief warpctc op
+ * \author Liang Xiang
+*/
+#include "./warpctc-inl.h"
+#include
+#include "../../src/operator/mshadow_op.h"
+
+namespace mxnet {
+namespace op {
+template<>
+Operator *CreateOp(WarpCTCParam param) {
+ return new WarpCTCOp(param);
+}
+
+} // namespace op
+} // namespace mxnet
diff --git a/plugin/warpctc/warpctc.mk b/plugin/warpctc/warpctc.mk
new file mode 100644
index 000000000000..2223879ee3e3
--- /dev/null
+++ b/plugin/warpctc/warpctc.mk
@@ -0,0 +1,7 @@
+CFLAGS += -I$(WARPCTC_PATH)/include
+LDFLAGS += -L$(WARPCTC_PATH)/build -lwarpctc
+
+WARPCTC_SRC = $(wildcard plugin/warpctc/*.cc)
+PLUGIN_OBJ += $(patsubst %.cc, build/%.o, $(WARPCTC_SRC))
+WARPCTC_CUSRC = $(wildcard plugin/warpctc/*.cu)
+PLUGIN_CUOBJ += $(patsubst %.cu, build/%_gpu.o, $(WARPCTC_CUSRC))
diff --git a/ps-lite b/ps-lite
index 8aff164580f0..35ddccd4cd03 160000
--- a/ps-lite
+++ b/ps-lite
@@ -1 +1 @@
-Subproject commit 8aff164580f0e4ff81ad98038b6ec4ec02452ce8
+Subproject commit 35ddccd4cd0302f78ed2a05f1258860d4666e43c
diff --git a/python/mxnet/_ndarray_internal.py b/python/mxnet/_ndarray_internal.py
new file mode 100644
index 000000000000..cbe2bcd96220
--- /dev/null
+++ b/python/mxnet/_ndarray_internal.py
@@ -0,0 +1 @@
+"""NDArray namespace used to register internal functions"""
diff --git a/python/mxnet/_symbol_internal.py b/python/mxnet/_symbol_internal.py
new file mode 100644
index 000000000000..d798f8d3704a
--- /dev/null
+++ b/python/mxnet/_symbol_internal.py
@@ -0,0 +1 @@
+"""Symbol namespace used to register internal functions"""
diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
index 8265eb39b52c..618d8cd4d783 100644
--- a/python/mxnet/callback.py
+++ b/python/mxnet/callback.py
@@ -8,22 +8,26 @@
import time
from .model import save_checkpoint
-def do_checkpoint(prefix):
+def do_checkpoint(prefix, period=1):
"""Callback to checkpoint the model to prefix every epoch.
Parameters
----------
prefix : str
The file prefix to checkpoint to
+ period : int
+ How many epochs to wait before checkpointing. Default is 1.
Returns
-------
callback : function
The callback function that can be passed as iter_end_callback to fit.
"""
+ period = int(max(1, period))
def _callback(iter_no, sym, arg, aux):
"""The checkpoint function."""
- save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
+ if (iter_no + 1) % period == 0:
+ save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
return _callback
diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py
index 13a5c3099864..32c6ec1748a4 100644
--- a/python/mxnet/executor.py
+++ b/python/mxnet/executor.py
@@ -98,19 +98,26 @@ def forward(self, is_train=False, **kwargs):
>>> # doing forward by not specifying things, but copy to the executor before hand
>>> mydata.copyto(texec.arg_dict['data'])
>>> texec.forward(is_train=True)
+ >>> # doing forward by specifying data and get outputs
+ >>> outputs = texec.forward(is_train=True, data=mydata)
+ >>> print(outputs[0].asnumpy())
"""
if len(kwargs) != 0:
arg_dict = self.arg_dict
for name, array in kwargs.items():
- if not isinstance(array, NDArray):
- raise ValueError('only accept keyword argument of NDArrays')
+ if not isinstance(array, (NDArray, np.ndarray)):
+ raise ValueError('only accept keyword argument of NDArrays and numpy.ndarray')
if name not in arg_dict:
raise TypeError('Unknown argument %s' % name)
- array.copyto(arg_dict[name])
+ if arg_dict[name].shape != array.shape:
+ raise ValueError('Shape not match! Argument %s, need: %s, received: %s'
+ %(name, str(arg_dict[name].shape), str(array.shape)))
+ arg_dict[name][:] = array
check_call(_LIB.MXExecutorForward(
self.handle,
ctypes.c_int(int(is_train))))
+ return self.outputs
def backward(self, out_grads=None):
"""Do backward pass to get the gradient of arguments.
diff --git a/python/mxnet/executor_manager.py b/python/mxnet/executor_manager.py
index 5a8f18e6612c..cc41691d342b 100644
--- a/python/mxnet/executor_manager.py
+++ b/python/mxnet/executor_manager.py
@@ -442,6 +442,7 @@ def load_data_batch(self, data_batch):
self.curr_execgrp = execgrp
else:
self.curr_execgrp = self.execgrp
+
self.curr_execgrp.load_data_batch(data_batch)
def forward(self, is_train=False):
diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py
index 3d59443419e6..47aa0bd3a7b9 100644
--- a/python/mxnet/initializer.py
+++ b/python/mxnet/initializer.py
@@ -1,4 +1,5 @@
# coding: utf-8
+# pylint: disable=too-many-branches
"""Initialization helper for mxnet"""
from __future__ import absolute_import
@@ -29,6 +30,10 @@ def __call__(self, name, arr):
raise TypeError('arr must be NDArray')
if name.startswith('upsampling'):
self._init_bilinear(name, arr)
+ elif name.startswith('stn_loc') and name.endswith('weight'):
+ self._init_zero(name, arr)
+ elif name.startswith('stn_loc') and name.endswith('bias'):
+ self._init_loc_bias(name, arr)
elif name.endswith('bias'):
self._init_bias(name, arr)
elif name.endswith('gamma'):
@@ -59,6 +64,11 @@ def _init_bilinear(self, _, arr):
weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
arr[:] = weight.reshape(shape)
+ def _init_loc_bias(self, _, arr):
+ shape = arr.shape
+ assert(shape[0] == 6)
+ arr[:] = np.array([1.0, 0, 0, 0, 1.0, 0])
+
def _init_zero(self, _, arr):
arr[:] = 0.0
@@ -82,6 +92,7 @@ def _init_default(self, name, _):
raise ValueError('Unknown initialization pattern for %s' % name)
# pylint: enable=no-self-use, missing-docstring, invalid-name
+
class Load(object):
"""Initialize by loading pretrained param from file or dict
@@ -124,6 +135,7 @@ def __call__(self, name, arr):
if self.verbose:
logging.info('Initialized %s by default', name)
+
class Mixed(object):
"""Initialize with mixed Initializer
@@ -176,6 +188,7 @@ def __init__(self, sigma=0.01):
def _init_weight(self, _, arr):
random.normal(0, self.sigma, out=arr)
+
class Orthogonal(Initializer):
"""Intialize weight as Orthogonal matrix
@@ -255,3 +268,19 @@ def _init_weight(self, _, arr):
random.normal(0, scale, out=arr)
else:
raise ValueError("Unknown random type")
+
+class MSRAPrelu(Xavier):
+ """Initialize the weight with initialization scheme from
+ Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification.
+
+ Parameters
+ ----------
+ factor_type: str, optional
+ Use ```avg```, ```in```, or ```out``` to init
+
+ slope: float, optional
+ initial slope of any PReLU (or similar) nonlinearities.
+ """
+ def __init__(self, factor_type="avg", slope=0.25):
+ magnitude = 2. / (1 + slope ** 2)
+ super(MSRAPrelu, self).__init__("gaussian", factor_type, magnitude)
diff --git a/python/mxnet/io.py b/python/mxnet/io.py
index 382db89a448a..532c6d12ebf2 100644
--- a/python/mxnet/io.py
+++ b/python/mxnet/io.py
@@ -16,6 +16,7 @@
from .base import check_call, ctypes2docstring
from .ndarray import NDArray
from .ndarray import array
+from .ndarray import concatenate
class DataBatch(object):
@@ -307,11 +308,11 @@ def _init_data(data, allow_empty, default_name):
raise TypeError("Input must be NDArray, numpy.ndarray, " + \
"a list of them or dict with them as values")
for k, v in data.items():
- if isinstance(v, NDArray):
- data[k] = v.asnumpy()
- for k, v in data.items():
- if not isinstance(v, np.ndarray):
- raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) + \
+ if not isinstance(v, NDArray):
+ try:
+ data[k] = array(v)
+ except:
+ raise TypeError(("Invalid type '%s' for %s, " % (type(v), k)) + \
"should be NDArray or numpy.ndarray")
return list(data.items())
@@ -348,8 +349,8 @@ def __init__(self, data, label=None, batch_size=1, shuffle=False, last_batch_han
if shuffle:
idx = np.arange(self.data[0][1].shape[0])
np.random.shuffle(idx)
- self.data = [(k, v[idx]) for k, v in self.data]
- self.label = [(k, v[idx]) for k, v in self.label]
+ self.data = [(k, array(v.asnumpy()[idx], v.context)) for k, v in self.data]
+ self.label = [(k, array(v.asnumpy()[idx], v.context)) for k, v in self.label]
self.data_list = [x[1] for x in self.data] + [x[1] for x in self.label]
self.num_source = len(self.data_list)
@@ -411,11 +412,10 @@ def _getdata(self, data_source):
"""Load data from underlying arrays, internal use only"""
assert(self.cursor < self.num_data), "DataIter needs reset."
if self.cursor + self.batch_size <= self.num_data:
- return [array(x[1][self.cursor:self.cursor+self.batch_size]) for x in data_source]
+ return [x[1][self.cursor:self.cursor+self.batch_size] for x in data_source]
else:
pad = self.batch_size - self.num_data + self.cursor
- return [array(np.concatenate((x[1][self.cursor:], x[1][:pad]),
- axis=0)) for x in data_source]
+ return [concatenate([x[1][self.cursor:], x[1][:pad]]) for x in data_source]
def getdata(self):
return self._getdata(self.data)
diff --git a/python/mxnet/libinfo.py b/python/mxnet/libinfo.py
index 6cf82d8ae3ff..54b49c4290b7 100644
--- a/python/mxnet/libinfo.py
+++ b/python/mxnet/libinfo.py
@@ -14,7 +14,8 @@ def find_lib_path():
"""
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
api_path = os.path.join(curr_path, '../../lib/')
- dll_path = [curr_path, api_path]
+ cmake_build_path = os.path.join(curr_path, '../../build/Release/')
+ dll_path = [curr_path, api_path, cmake_build_path]
if os.name == 'nt':
vs_configuration = 'Release'
if platform.architecture()[0] == '64bit':
diff --git a/python/mxnet/model.py b/python/mxnet/model.py
index 6d8204e6f23d..614f01813505 100644
--- a/python/mxnet/model.py
+++ b/python/mxnet/model.py
@@ -232,6 +232,7 @@ def _train_multi_device(symbol, ctx, arg_names, param_names, aux_names,
do_reset = True
for data_batch in train_data:
executor_manager.load_data_batch(data_batch)
+
if monitor is not None:
monitor.tic()
@@ -273,7 +274,7 @@ def _train_multi_device(symbol, ctx, arg_names, param_names, aux_names,
do_reset = False
break
- if do_reset is True:
+ if do_reset == True:
logger.info('Epoch[%d] Resetting Data Iterator', epoch)
train_data.reset()
@@ -315,6 +316,7 @@ def _train_multi_device(symbol, ctx, arg_names, param_names, aux_names,
name_value = eval_metric.get_name_value()
for name, value in name_value:
logger.info('Epoch[%d] Validation-%s=%f', epoch, name, value)
+ eval_data.reset()
# end of all epochs
return
diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py
index 5549965ca702..d73070284fe4 100644
--- a/python/mxnet/module/base_module.py
+++ b/python/mxnet/module/base_module.py
@@ -1,4 +1,4 @@
-# pylint: disable=too-many-arguments, too-many-locals, too-many-public-methods
+# pylint: disable=too-many-arguments, too-many-locals, too-many-public-methods, too-many-branches
"""`BaseModule` defines an API for modules."""
import logging
@@ -276,7 +276,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
eval_batch_end_callback=None, initializer=Uniform(0.01),
arg_params=None, aux_params=None, allow_missing=False,
force_rebind=False, force_init=False, begin_epoch=0, num_epoch=None,
- validation_metric=None):
+ validation_metric=None, monitor=None):
"""Train the module parameters.
Parameters
@@ -327,11 +327,12 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
num_epoch : int
Number of epochs to run training.
"""
-
assert num_epoch is not None, 'please specify number of epochs'
self.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label,
for_training=True, force_rebind=force_rebind)
+ if monitor is not None:
+ self.install_monitor(monitor)
self.init_params(initializer=initializer, arg_params=arg_params, aux_params=aux_params,
allow_missing=allow_missing, force_init=force_init)
self.init_optimizer(kvstore=kvstore, optimizer=optimizer,
@@ -349,10 +350,15 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
tic = time.time()
eval_metric.reset()
for nbatch, data_batch in enumerate(train_data):
+ if monitor is not None:
+ monitor.tic()
self.forward_backward(data_batch)
self.update()
self.update_metric(eval_metric, data_batch.label)
+ if monitor is not None:
+ monitor.toc_print()
+
if batch_end_callback is not None:
batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch,
eval_metric=eval_metric,
@@ -452,7 +458,7 @@ def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=Non
"""
raise NotImplementedError()
- def set_params(self, arg_params, aux_params):
+ def set_params(self, arg_params, aux_params, allow_missing=False, force_init=True):
"""Assign parameter and aux state values.
Parameters
@@ -461,9 +467,15 @@ def set_params(self, arg_params, aux_params):
Dictionary of name to value (`NDArray`) mapping.
aux_params : dict
Dictionary of name to value (`NDArray`) mapping.
+ allow_missing : bool
+ If true, params could contain missing values, and the initializer will be
+ called to fill those missing params.
+ force_init : bool
+ If true, will force re-initialize even if already initialized.
+
"""
self.init_params(initializer=None, arg_params=arg_params, aux_params=aux_params,
- allow_missing=False, force_init=True)
+ allow_missing=allow_missing, force_init=force_init)
def save_params(self, fname):
"""Save model parameters to file.
@@ -499,6 +511,10 @@ def load_params(self, fname):
raise ValueError("Invalid param file " + fname)
self.set_params(arg_params, aux_params)
+ def install_monitor(self, mon):
+ """Install monitor on all executors"""
+ raise NotImplementedError()
+
################################################################################
# Computations
################################################################################
diff --git a/python/mxnet/module/bucketing_module.py b/python/mxnet/module/bucketing_module.py
index 7ab039ea45d4..94f47948415e 100644
--- a/python/mxnet/module/bucketing_module.py
+++ b/python/mxnet/module/bucketing_module.py
@@ -199,7 +199,7 @@ def switch_bucket(self, bucket_key, data_shapes, label_shapes=None):
Typically `data_batch.provide_label`.
"""
assert self.binded, 'call bind before switching bucket'
- if not self._buckets.has_key(bucket_key):
+ if not bucket_key in self._buckets:
symbol, data_names, label_names = self._sym_gen(bucket_key)
module = Module(symbol, data_names, label_names,
logger=self.logger, context=self._context,
@@ -236,7 +236,7 @@ def init_optimizer(self, kvstore='local', optimizer='sgd',
self._curr_module.init_optimizer(kvstore, optimizer, optimizer_params,
force_init=force_init)
- for mod in self._buckets.itervalues():
+ for mod in self._buckets.values():
if mod is not self._curr_module:
mod.borrow_optimizer(self._curr_module)
@@ -325,3 +325,9 @@ def symbol(self):
"""The symbol of the current bucket being used."""
assert self.binded
return self._curr_module.symbol
+
+ def install_monitor(self, mon):
+ """ Install monitor on all executors """
+ assert self.binded
+ for mod in self._buckets.values():
+ mod.install_monitor(mon)
diff --git a/python/mxnet/module/executor_group.py b/python/mxnet/module/executor_group.py
index f51d94df4799..62e815e487ec 100644
--- a/python/mxnet/module/executor_group.py
+++ b/python/mxnet/module/executor_group.py
@@ -57,10 +57,13 @@ class DataParallelExecutorGroup(object):
of the data/label inputs.
logger : Logger
Default is `logging`.
+ fixed_param_names: list of str
+ Indicate parameters to be fixed during training. Parameters in this list will not allocate
+ space for gradient, nor do gradient calculation.
"""
def __init__(self, symbol, contexts, workload, data_shapes, label_shapes, param_names,
for_training, inputs_need_grad, shared_group=None, input_types=None,
- logger=logging):
+ logger=logging, fixed_param_names=None):
self.param_names = param_names
self.arg_names = symbol.list_arguments()
self.aux_names = symbol.list_auxiliary_states()
@@ -75,6 +78,10 @@ def __init__(self, symbol, contexts, workload, data_shapes, label_shapes, param_
self.input_types = input_types
self.logger = logger
+ self.fixed_param_names = fixed_param_names
+ if self.fixed_param_names is None:
+ self.fixed_param_names = []
+
if shared_group is not None:
self.shared_data_arrays = shared_group.shared_data_arrays
else:
@@ -335,7 +342,7 @@ def _bind_ith_exec(self, i, data_shapes, label_shapes, shared_group):
grad_req = {}
for name in self.arg_names:
if self.for_training:
- if name in self.param_names:
+ if name in self.param_names and name not in self.fixed_param_names:
grad_req[name] = 'write'
elif name in data_names:
grad_req[name] = 'write' if self.inputs_need_grad else 'null'
@@ -424,3 +431,8 @@ def _sliced_shape(self, shapes, i):
"""
return [(k, tuple([self.slices[i].stop-self.slices[i].start] + list(v[1:])))
for k, v in shapes]
+
+ def install_monitor(self, mon):
+ """Install monitor on all executors"""
+ for exe in self.execs:
+ mon.install(exe)
diff --git a/python/mxnet/module/module.py b/python/mxnet/module/module.py
index 3156ac98948c..36f92f084881 100644
--- a/python/mxnet/module/module.py
+++ b/python/mxnet/module/module.py
@@ -33,9 +33,11 @@ class Module(BaseModule):
Default is `cpu()`.
work_load_list : list of number
Default `None`, indicating uniform workload.
+ fixed_param_names: list of str
+ Default `None`, indicating no network parameters are fixed.
"""
def __init__(self, symbol, data_names=('data',), label_names=('softmax_label',),
- logger=logging, context=ctx.cpu(), work_load_list=None):
+ logger=logging, context=ctx.cpu(), work_load_list=None, fixed_param_names=None):
super(Module, self).__init__(logger=logger)
if isinstance(context, ctx.Context):
@@ -54,6 +56,7 @@ def __init__(self, symbol, data_names=('data',), label_names=('softmax_label',),
arg_names = symbol.list_arguments()
input_names = data_names + label_names
self._param_names = [x for x in arg_names if x not in input_names]
+ self._fixed_param_names = fixed_param_names
self._aux_names = symbol.list_auxiliary_states()
self._data_names = data_names
self._label_names = label_names
@@ -169,15 +172,17 @@ def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=Non
def _impl(name, arr, cache):
"""Internal helper for parameter initialization"""
if cache is not None:
- if cache.has_key(name):
+ if name in cache:
cache_arr = cache[name]
# just in case the cached array is just the target itself
if cache_arr is not arr:
cache_arr.copyto(arr)
else:
- assert allow_missing
- initializer(name, arr)
+ if not allow_missing:
+ raise RuntimeError("%s is not presented" % name)
+ if initializer != None:
+ initializer(name, arr)
else:
initializer(name, arr)
@@ -253,14 +258,13 @@ def bind(self, data_shapes, label_shapes=None, for_training=True,
self._work_load_list, data_shapes,
label_shapes, self._param_names,
for_training, inputs_need_grad,
- shared_group, logger=self.logger)
-
+ shared_group, logger=self.logger,
+ fixed_param_names=self._fixed_param_names)
if shared_module is not None:
self.params_initialized = True
self._arg_params = shared_module._arg_params
self._aux_params = shared_module._aux_params
-
- if self.params_initialized:
+ elif self.params_initialized:
# if the parameters are already initialized, we are re-binding
# so automatically copy the already initialized params
self._exec_group.set_params(self._arg_params, self._aux_params)
@@ -449,3 +453,8 @@ def _sync_params_from_devices(self):
latest parameters from `self._arg_params` and `self._aux_params`.
"""
self._exec_group.get_params(self._arg_params, self._aux_params)
+
+ def install_monitor(self, mon):
+ """ Install monitor on all executors """
+ assert self.binded
+ self._exec_group.install_monitor(mon)
diff --git a/python/mxnet/module/python_module.py b/python/mxnet/module/python_module.py
index 09866d8948e4..ab9b952010a7 100644
--- a/python/mxnet/module/python_module.py
+++ b/python/mxnet/module/python_module.py
@@ -326,3 +326,7 @@ def get_input_grads(self, merge_multi_context=True):
"""
assert merge_multi_context == True
return [self._scores_grad]
+
+ def install_monitor(self, mon):
+ """Install monitor on all executors"""
+ raise NotImplementedError()
diff --git a/python/mxnet/module/sequential_module.py b/python/mxnet/module/sequential_module.py
index 75c499dfafc7..3e9ac3d49855 100644
--- a/python/mxnet/module/sequential_module.py
+++ b/python/mxnet/module/sequential_module.py
@@ -383,3 +383,9 @@ def update_metric(self, eval_metric, labels):
if meta.has_key(SequentialModule.META_TAKE_LABELS) and \
meta[SequentialModule.META_TAKE_LABELS]:
module.update_metric(eval_metric, labels)
+
+ def install_monitor(self, mon):
+ """ Install monitor on all executors """
+ assert self.binded
+ for module in self._modules:
+ module.install_monitor(mon)
diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py
index 12ad735ffc72..3d61f95b6b21 100644
--- a/python/mxnet/ndarray.py
+++ b/python/mxnet/ndarray.py
@@ -1,7 +1,8 @@
# coding: utf-8
-# pylint: disable= too-many-lines, redefined-builtin
+# pylint: disable= too-many-lines, redefined-builtin, protected-access
"""NDArray API of mxnet."""
from __future__ import absolute_import
+from __future__ import division
import ctypes
import warnings
@@ -15,7 +16,9 @@
from .base import ctypes2buffer
from .base import check_call, ctypes2docstring
from .context import Context
+from . import _ndarray_internal as _internal
+# pylint: disable= no-member
_DTYPE_NP_TO_MX = {
np.float32 : 0,
np.float64 : 1,
@@ -31,6 +34,7 @@
3 : np.uint8,
4 : np.int32
}
+# pylint: enable= no-member
def _new_empty_handle():
"""Return a new empty handle.
@@ -100,9 +104,9 @@ def __iadd__(self, other):
if not self.writable:
raise ValueError('trying to add to a readonly NDArray')
if isinstance(other, NDArray):
- return NDArray._plus(self, other, out=self)
+ return _internal._plus(self, other, out=self)
elif isinstance(other, numeric_types):
- return NDArray._plus_scalar(self, float(other), out=self)
+ return _internal._plus_scalar(self, float(other), out=self)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -116,9 +120,9 @@ def __isub__(self, other):
if not self.writable:
raise ValueError('trying to subtract from a readonly NDArray')
if isinstance(other, NDArray):
- return NDArray._minus(self, other, out=self)
+ return _internal._minus(self, other, out=self)
elif isinstance(other, numeric_types):
- return NDArray._minus_scalar(self, float(other), out=self)
+ return _internal._minus_scalar(self, float(other), out=self)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -129,15 +133,15 @@ def __mul__(self, other):
return multiply(self, other)
def __neg__(self):
- return NDArray._mul_scalar(self, -1.0)
+ return _internal._mul_scalar(self, -1.0)
def __imul__(self, other):
if not self.writable:
raise ValueError('trying to multiply to a readonly NDArray')
if isinstance(other, NDArray):
- return NDArray._mul(self, other, out=self)
+ return _internal._mul(self, other, out=self)
elif isinstance(other, numeric_types):
- return NDArray._mul_scalar(self, float(other), out=self)
+ return _internal._mul_scalar(self, float(other), out=self)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -154,14 +158,26 @@ def __idiv__(self, other):
if not self.writable:
raise ValueError('trying to divide from a readonly NDArray')
if isinstance(other, NDArray):
- return NDArray._div(self, other, out=self)
+ return _internal._div(self, other, out=self)
elif isinstance(other, numeric_types):
- return NDArray._div_scalar(self, float(other), out=self)
+ return _internal._div_scalar(self, float(other), out=self)
else:
raise TypeError('type %s not supported' % str(type(other)))
def __truediv__(self, other):
- return self.__div__(other)
+ return divide(self, other)
+
+ def __rtruediv__(self, other):
+ return divide(other, self)
+
+ def __itruediv__(self, other):
+ return self.__idiv__(other)
+
+ def __pow__(self, other):
+ return power(self, other)
+
+ def __rpow__(self, other):
+ return power(other, self)
def __getstate__(self):
this = self.__dict__.copy()
@@ -204,7 +220,7 @@ def __setitem__(self, in_slice, value):
if value.handle is not self.handle:
value.copyto(self)
elif isinstance(value, numeric_types):
- NDArray._set_value(float(value), out=self)
+ _internal._set_value(float(value), out=self)
elif isinstance(value, (np.ndarray, np.generic)):
self._sync_copyfrom(value)
else:
@@ -300,20 +316,19 @@ def broadcast_to(self, shape):
the broadcast shape
"""
cur_shape = self.shape
- err_str = 'operands could not be broadcast together with remapped shapes'\
- '[original->remapped]: {} and requested shape {}'.format(cur_shape, shape)
+ err_str = 'operands could not be broadcast together with remapped shapes' \
+ '[original->remapped]: {} and requested shape {}'.format(cur_shape, shape)
if len(shape) < len(cur_shape):
raise ValueError(err_str)
cur_shape = (1,) * (len(shape) - len(cur_shape)) + cur_shape
- cur_shape = np.array(cur_shape)
- shape = np.array(shape)
- broadcasting_axes = np.nonzero(cur_shape != shape)
- if (cur_shape[broadcasting_axes] != 1).any():
+ cur_shape_arr = np.array(cur_shape)
+ broadcasting_axes = np.nonzero(cur_shape_arr != np.array(shape))
+ if (cur_shape_arr[broadcasting_axes] != 1).any():
raise ValueError(err_str)
- ret = self.reshape(tuple(cur_shape))
- for axis in broadcasting_axes[0]:
- ret = broadcast_axis(ret, axis=axis, size=shape[axis])
- return ret
+ if cur_shape != self.shape:
+ return broadcast_to(self.reshape(cur_shape), shape=shape)
+ else:
+ return broadcast_to(self, shape=tuple(shape))
# pylint: enable= undefined-variable
def wait_to_read(self):
@@ -454,10 +469,10 @@ def copyto(self, other):
warnings.warn('copy an array to itself, is it intended?',
RuntimeWarning)
return
- return NDArray._copyto(self, out=other)
+ return _internal._copyto(self, out=other)
elif isinstance(other, Context):
hret = NDArray(_new_alloc_handle(self.shape, other, True, self.dtype))
- return NDArray._copyto(self, out=hret)
+ return _internal._copyto(self, out=hret)
else:
raise TypeError('copyto do not support type ' + str(type(other)))
@@ -509,7 +524,7 @@ def onehot_encode(indices, out):
Same as out.
"""
# pylint: disable= no-member, protected-access
- return NDArray._onehot_encode(indices, out, out=out)
+ return _internal._onehot_encode(indices, out, out=out)
# pylint: enable= no-member, protected-access
@@ -535,40 +550,85 @@ def empty(shape, ctx=None, dtype=mx_real_t):
ctx = Context.default_ctx
return NDArray(handle=_new_alloc_handle(shape, ctx, False, dtype))
-def add(lhs, rhs):
- """ Perform element-wise addition
+#pylint: disable= too-many-arguments, no-member, protected-access
+def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None):
+ """ Helper function for element-wise operation
+ The function will perform numpy-like broadcasting if needed and call different functions
Parameters
----------
- lhs : Array or float value
- left hand side operand
+ lhs : NDArray or numeric value
+ left hande side operand
- rhs : Array of float value
+ rhs : NDArray or numeric value
right hand side operand
+ fn_array : function
+ function to be called if both lhs and rhs are of NDArray type
+
+ fn_scalar : function
+ function to be called if both lhs and rhs are numeric values
+
+ lfn_scalar : function
+ function to be called if lhs is NDArray while rhs is numeric value
+
+ rfn_scalar : function
+ function to be called if lhs is numeric value while rhs is NDArray;
+ if none is provided, then the function is commutative, so rfn_scalar is equal to lfn_scalar
+
Returns
-------
- out: Array
+ out: NDArray
result array
"""
- # pylint: disable= no-member, protected-access
if isinstance(lhs, numeric_types):
if isinstance(rhs, numeric_types):
- return lhs + rhs
+ return fn_scalar(lhs, rhs)
else:
- return add(rhs, lhs)
+ if rfn_scalar is None:
+ # commutative function
+ return lfn_scalar(rhs, float(lhs))
+ else:
+ return rfn_scalar(rhs, float(lhs))
elif isinstance(rhs, numeric_types):
- return NDArray._plus_scalar(lhs, float(rhs))
+ return lfn_scalar(lhs, float(rhs))
elif isinstance(rhs, NDArray):
+ # check whether broadcasting is needed
lsize = functools.reduce(operator.mul, lhs.shape)
rsize = functools.reduce(operator.mul, rhs.shape)
if lsize < rsize:
lhs = lhs.broadcast_to(rhs.shape)
elif lsize > rsize:
rhs = rhs.broadcast_to(lhs.shape)
- return NDArray._plus(lhs, rhs)
+ return fn_array(lhs, rhs)
else:
raise TypeError('type %s not supported' % str(type(rhs)))
+#pylint: enable= too-many-arguments, no-member, protected-access
+
+def add(lhs, rhs):
+ """ Perform element-wise addition
+
+ Parameters
+ ----------
+ lhs : Array or float value
+ left hand side operand
+
+ rhs : Array of float value
+ right hand side operand
+
+ Returns
+ -------
+ out: Array
+ result array
+ """
+ # pylint: disable= no-member, protected-access
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._plus,
+ operator.add,
+ _internal._plus_scalar,
+ None)
# pylint: enable= no-member, protected-access
def subtract(lhs, rhs):
@@ -588,25 +648,13 @@ def subtract(lhs, rhs):
result array
"""
# pylint: disable= no-member, protected-access
- if isinstance(lhs, numeric_types):
- if isinstance(rhs, numeric_types):
- return lhs - rhs
- elif isinstance(rhs, NDArray):
- return NDArray._rminus_scalar(rhs, float(lhs))
- else:
- raise TypeError('type %s not supported' % str(type(rhs)))
- elif isinstance(rhs, numeric_types):
- return NDArray._minus_scalar(lhs, float(rhs))
- elif isinstance(rhs, NDArray):
- lsize = functools.reduce(operator.mul, lhs.shape)
- rsize = functools.reduce(operator.mul, rhs.shape)
- if lsize < rsize:
- lhs = lhs.broadcast_to(rhs.shape)
- elif lsize > rsize:
- rhs = rhs.broadcast_to(lhs.shape)
- return NDArray._minus(lhs, rhs)
- else:
- raise TypeError('type %s not supported' % str(type(rhs)))
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._minus,
+ operator.sub,
+ _internal._minus_scalar,
+ _internal._rminus_scalar)
# pylint: enable= no-member, protected-access
def multiply(lhs, rhs):
@@ -626,23 +674,13 @@ def multiply(lhs, rhs):
result array
"""
# pylint: disable= no-member, protected-access
- if isinstance(lhs, numeric_types):
- if isinstance(rhs, numeric_types):
- return lhs * rhs
- else:
- return multiply(rhs, lhs)
- elif isinstance(rhs, numeric_types):
- return NDArray._mul_scalar(lhs, float(rhs))
- elif isinstance(rhs, NDArray):
- lsize = functools.reduce(operator.mul, lhs.shape)
- rsize = functools.reduce(operator.mul, rhs.shape)
- if lsize < rsize:
- lhs = lhs.broadcast_to(rhs.shape)
- elif lsize > rsize:
- rhs = rhs.broadcast_to(lhs.shape)
- return NDArray._mul(lhs, rhs)
- else:
- raise TypeError('type %s not supported' % str(type(rhs)))
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._mul,
+ operator.mul,
+ _internal._mul_scalar,
+ None)
# pylint: enable= no-member, protected-access
def divide(lhs, rhs):
@@ -662,25 +700,91 @@ def divide(lhs, rhs):
result array
"""
# pylint: disable= no-member, protected-access
- if isinstance(lhs, numeric_types):
- if isinstance(rhs, numeric_types):
- return lhs / rhs
- elif isinstance(rhs, NDArray):
- return NDArray._rdiv_scalar(rhs, float(lhs))
- else:
- raise TypeError('type %s not supported' % str(type(rhs)))
- elif isinstance(rhs, numeric_types):
- return NDArray._div_scalar(lhs, float(rhs))
- elif isinstance(rhs, NDArray):
- lsize = functools.reduce(operator.mul, lhs.shape)
- rsize = functools.reduce(operator.mul, rhs.shape)
- if lsize < rsize:
- lhs = lhs.broadcast_to(rhs.shape)
- elif lsize > rsize:
- rhs = rhs.broadcast_to(lhs.shape)
- return NDArray._div(lhs, rhs)
- else:
- raise TypeError('type %s not supported' % str(type(rhs)))
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._div,
+ operator.truediv,
+ _internal._div_scalar,
+ _internal._rdiv_scalar)
+ # pylint: enable= no-member, protected-access
+
+def power(lhs, rhs):
+ """ Perform power operator
+
+ Parameters
+ ----------
+ lhs : Array or float value
+ left hand side operand
+
+ rhs : Array of float value
+ right hand side operand
+
+ Returns
+ -------
+ out: Array
+ result array
+ """
+ # pylint: disable= no-member, protected-access
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._power,
+ operator.pow,
+ _internal._power_scalar,
+ _internal._rpower_scalar)
+ # pylint: enable= no-member, protected-access
+
+def maximum(lhs, rhs):
+ """ Perform maximum operator
+
+ Parameters
+ ----------
+ lhs : Array or float value
+ left hand side operand
+
+ rhs : Array of float value
+ right hand side operand
+
+ Returns
+ -------
+ out: Array
+ result array
+ """
+ # pylint: disable= no-member, protected-access
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._maximum,
+ lambda x, y: x if x > y else y,
+ _internal._maximum_scalar,
+ None)
+ # pylint: enable= no-member, protected-access
+
+def minimum(lhs, rhs):
+ """ Perform minimum operator
+
+ Parameters
+ ----------
+ lhs : Array or float value
+ left hand side operand
+
+ rhs : Array of float value
+ right hand side operand
+
+ Returns
+ -------
+ out: Array
+ result array
+ """
+ # pylint: disable= no-member, protected-access
+ return _ufunc_helper(
+ lhs,
+ rhs,
+ _internal._minimum,
+ lambda x, y: x if x < y else y,
+ _internal._minimum_scalar,
+ None)
# pylint: enable= no-member, protected-access
def true_divide(lhs, rhs):
@@ -731,125 +835,6 @@ def ones(shape, ctx=None, dtype=mx_real_t):
arr[:] = 1.0
return arr
-# pylint: disable=too-many-locals, invalid-name, no-member, protected-access, undefined-variable
-# pylint: disable=too-many-branches
-def _reduce(arr, axis=None, keepdims=False, typ='sum'):
- """ Reduce the array along given axises. The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- arr : Array
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Array
- The reduced NDArray.
- """
- if 'sum' == typ:
- reduce_func = sum_axis
- elif 'max' == typ:
- reduce_func = max_axis
- elif 'min' == typ:
- reduce_func = min_axis
- else:
- raise TypeError('typ=\'%s\' is not supported.' % typ)
- ndim = len(arr.shape)
- if axis is None:
- axis = list(range(ndim))
- elif isinstance(axis, int):
- axis = [axis]
- elif isinstance(axis, tuple) or isinstance(axis, list):
- axis = list(axis)
- else:
- raise TypeError('\'%s\' object is not supported as axis.' % type(axis).__name__)
-
- if list(range(ndim)) == axis:
- ret = reduce_func(arr, axis=-1, keepdims=keepdims)
- if not keepdims:
- return ret.asnumpy()[0]
- else:
- return ret
- for i in axis:
- if not isinstance(i, int):
- raise TypeError('\'%s\' object cannot be interpreted as an integer' % type(i).__name__)
- axis = sorted([x if 0 <= x else x + ndim for x in axis])
- for i in axis:
- if i < 0 or ndim <= i:
- raise ValueError('\'axis\' entry is out of bounds')
- if len(set(axis)) != len(axis):
- raise ValueError('duplicate value in \'axis\'')
- assert(len(axis) != 0)
- ret = arr
- for i in reversed(axis):
- ret = reduce_func(ret, axis=i, keepdims=keepdims)
- return ret
-# pylint: enable=too-many-locals, invalid-name, no-member, protected-access, undefined-variable
-# pylint: enable=too-many-branches
-
-def sum(arr, axis=None, keepdims=False):
- """ Sum the array along given axises. The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- arr : Array
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Array
- The reduced NDArray.
- """
- return _reduce(arr=arr, axis=axis, keepdims=keepdims, typ='sum')
-
-def max(arr, axis=None, keepdims=False):
- """ Take the maximum of the array along given axises.
- The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- arr : Array
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Array
- The reduced NDArray.
- """
- return _reduce(arr=arr, axis=axis, keepdims=keepdims, typ='max')
-
-def min(arr, axis=None, keepdims=False):
- """ Take the minimum of the array along given axises.
- The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- arr : Array
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Array
- The reduced NDArray.
- """
- return _reduce(arr=arr, axis=axis, keepdims=keepdims, typ='min')
-
def full(shape, val, ctx=None):
"""Create a new NDArray filled with given value, with specified shape.
@@ -1042,24 +1027,24 @@ def imdecode(str_img, clip_rect=(0, 0, 0, 0), out=None, index=0, channels=3, mea
if mean is None:
mean = NDArray(_new_empty_handle())
if out is None:
- return NDArray._imdecode(mean, index,
- clip_rect[0],
- clip_rect[1],
- clip_rect[2],
- clip_rect[3],
- channels,
- len(str_img),
- str_img=str_img)
+ return _internal._imdecode(mean, index,
+ clip_rect[0],
+ clip_rect[1],
+ clip_rect[2],
+ clip_rect[3],
+ channels,
+ len(str_img),
+ str_img=str_img)
else:
- return NDArray._imdecode(mean, index,
- clip_rect[0],
- clip_rect[1],
- clip_rect[2],
- clip_rect[3],
- channels,
- len(str_img),
- str_img=str_img,
- out=out)
+ return _internal._imdecode(mean, index,
+ clip_rect[0],
+ clip_rect[1],
+ clip_rect[2],
+ clip_rect[3],
+ channels,
+ len(str_img),
+ str_img=str_img,
+ out=out)
# pylint: disable=too-many-locals, invalid-name
def _make_ndarray_function(handle):
@@ -1123,7 +1108,7 @@ def binary_ndarray_function(lhs, rhs, out=None, **kwargs):
"""Internal binary function
"""
if out:
- if isinstance(out, NDArray) == False:
+ if not isinstance(out, NDArray):
raise TypeError('out must be NDArray')
if not out.writable:
raise TypeError('out must be writable')
@@ -1144,7 +1129,7 @@ def binary_ndarray_function(lhs, rhs, out=None, **kwargs):
def unary_ndarray_function(src, out=None, *args, **kwargs):
"""internal NDArray function"""
if out:
- if isinstance(out, NDArray) == False:
+ if not isinstance(out, NDArray):
raise TypeError('out must be NDArray')
if not out.writable:
raise TypeError('out must be writable')
@@ -1225,12 +1210,13 @@ def _init_ndarray_module():
ctypes.byref(plist)))
module_obj = sys.modules[__name__]
+ module_internal = sys.modules["mxnet._ndarray_internal"]
for i in range(size.value):
hdl = FunctionHandle(plist[i])
function = _make_ndarray_function(hdl)
- # if function name starts with underscore, register as static method of NDArray
+ # if function name starts with underscore, register as internal namespace
if function.__name__.startswith('_'):
- setattr(NDArray, function.__name__, staticmethod(function))
+ setattr(module_internal, function.__name__, function)
else:
fname = function.__name__
fn_obj = getattr(module_obj, fname, None)
diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py
index 9e186eed926f..e9c0e385022d 100644
--- a/python/mxnet/operator.py
+++ b/python/mxnet/operator.py
@@ -211,10 +211,10 @@ def list_arguments_entry(out, _):
None, None, None, None, None)
cb_ptr = format(cast(pointer(self.info_), c_void_p).value, 'x')
# pylint: disable=E1101
- sym = symbol.Symbol._Native(*args,
- info=cb_ptr,
- need_top_grad=self.need_top_grad(),
- **kwargs)
+ sym = symbol._internal._Native(*args,
+ info=cb_ptr,
+ need_top_grad=self.need_top_grad(),
+ **kwargs)
# keep a reference of ourself in PythonOp so we don't get garbage collected.
PythonOp._ref_holder.append(self)
return sym
@@ -358,9 +358,9 @@ def declare_backward_dependency(out_grad, in_data, out_data, num_dep, deps, _):
None, None, None, None, None, None)
cb_ptr = format(cast(pointer(self.info_), c_void_p).value, 'x')
# pylint: disable=E1101
- sym = symbol.Symbol._NDArray(*args,
- info=cb_ptr,
- **kwargs)
+ sym = symbol._internal._NDArray(*args,
+ info=cb_ptr,
+ **kwargs)
# keep a reference of ourself in PythonOp so we don't get garbage collected.
PythonOp._ref_holder.append(self)
return sym
diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py
index 18c5da666843..94a84232f81f 100644
--- a/python/mxnet/optimizer.py
+++ b/python/mxnet/optimizer.py
@@ -5,7 +5,7 @@
from .base import _LIB, check_call
from .base import c_array, mx_uint, mx_float, c_str
from .base import OptimizerHandle, OptimizerCreator
-from .ndarray import NDArray, zeros, clip, sqrt
+from .ndarray import NDArray, zeros, clip, sqrt, square
from .random import normal
@@ -90,7 +90,7 @@ def _init_cc_optimizer(name, param_keys, param_vals):
def __init__(self, rescale_grad=1., param_idx2name=None, wd=0.,
clip_gradient=None, learning_rate=0.01,
- lr_scheduler=None, sym=None):
+ lr_scheduler=None, sym=None, begin_num_update=0):
self.rescale_grad = rescale_grad
self.lr = learning_rate
self.lr_scheduler = lr_scheduler
@@ -100,7 +100,8 @@ def __init__(self, rescale_grad=1., param_idx2name=None, wd=0.,
self.wd = wd
self.lr_mult = {}
self.wd_mult = {}
- self.num_update = 0
+ self.begin_num_update = begin_num_update
+ self.num_update = begin_num_update
self._index_update_count = {}
self.clip_gradient = clip_gradient
@@ -176,7 +177,7 @@ def _update_count(self, index):
The index will be updated
"""
if index not in self._index_update_count:
- self._index_update_count[index] = 0
+ self._index_update_count[index] = self.begin_num_update
self._index_update_count[index] += 1
self.num_update = max(self._index_update_count[index], self.num_update)
@@ -586,14 +587,17 @@ def update(self, index, weight, grad, state):
if self.clip_gradient is not None:
clip(grad, -self.clip_gradient, self.clip_gradient, out=grad)
- mean[:] = self.beta1 * mean + (1. - self.beta1) * grad
- variance[:] = self.beta2 * variance + (1. - self.beta2) * grad * grad
+ mean *= self.beta1
+ mean += grad * (1. - self.beta1)
+
+ variance *= self.beta2
+ variance += (1 - self.beta2) * square(grad, out=grad)
coef1 = 1. - self.beta1**t
coef2 = 1. - self.beta2**t
lr *= math.sqrt(coef2)/coef1
- weight[:] -= lr*mean/(sqrt(variance) + self.epsilon)
+ weight -= lr*mean/(sqrt(variance) + self.epsilon)
wd = self._get_wd(index)
if wd > 0.:
diff --git a/python/mxnet/random.py b/python/mxnet/random.py
index 489a8bd16097..b54e40d653bb 100644
--- a/python/mxnet/random.py
+++ b/python/mxnet/random.py
@@ -5,7 +5,8 @@
import ctypes
from .base import _LIB, check_call
-from .ndarray import NDArray, empty
+from .ndarray import empty
+from . import _ndarray_internal as _internal
def uniform(low, high, shape=None, ctx=None, out=None):
@@ -38,17 +39,17 @@ def uniform(low, high, shape=None, ctx=None, out=None):
if isinstance(shape, int):
shape = (shape,)
out = empty(shape, ctx)
- return NDArray._random_uniform(low, high, out=out)
+ return _internal._sample_uniform(low=low, high=high, shape=out.shape, out=out)
-def normal(mean, stdvar, shape=None, ctx=None, out=None):
+def normal(loc, scale, shape=None, ctx=None, out=None):
"""Generate normal(Gaussian) distribution N(mean, stdvar^2) with shape.
Parameters
----------
- mean : float
+ loc : float
The mean of the normal distribution.
- stdvar : float
+ scale : float
The standard deviation of normal distribution.
shape : tuple, optional
Output shape of the NDArray generated.
@@ -71,7 +72,7 @@ def normal(mean, stdvar, shape=None, ctx=None, out=None):
if isinstance(shape, int):
shape = (shape,)
out = empty(shape, ctx)
- return NDArray._random_gaussian(mean, stdvar, out=out)
+ return _internal._sample_normal(loc=loc, scale=scale, shape=out.shape, out=out)
def seed(seed_state):
@@ -96,4 +97,3 @@ def seed(seed_state):
raise ValueError('sd must be int')
seed_state = ctypes.c_int(int(seed_state))
check_call(_LIB.MXRandomSeed(seed_state))
-
diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py
index 2577b8f65c30..dfe1890bddea 100644
--- a/python/mxnet/symbol.py
+++ b/python/mxnet/symbol.py
@@ -19,7 +19,7 @@
from .ndarray import NDArray, zeros, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP
from .executor import Executor
from .symbol_doc import SymbolDoc
-
+from . import _symbol_internal as _internal
class Symbol(object):
"""Symbol is symbolic graph of the mxnet."""
@@ -37,9 +37,9 @@ def __init__(self, handle):
def __add__(self, other):
if isinstance(other, Symbol):
- return Symbol._Plus(self, other)
+ return _internal._Plus(self, other)
if isinstance(other, Number):
- return Symbol._PlusScalar(self, scalar=other)
+ return _internal._PlusScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -48,23 +48,23 @@ def __radd__(self, other):
def __sub__(self, other):
if isinstance(other, Symbol):
- return Symbol._Minus(self, other)
+ return _internal._Minus(self, other)
if isinstance(other, Number):
- return Symbol._MinusScalar(self, scalar=other)
+ return _internal._MinusScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
def __rsub__(self, other):
if isinstance(other, Number):
- return Symbol._RMinusScalar(self, scalar=other)
+ return _internal._RMinusScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
def __mul__(self, other):
if isinstance(other, Symbol):
- return Symbol._Mul(self, other)
+ return _internal._Mul(self, other)
if isinstance(other, Number):
- return Symbol._MulScalar(self, scalar=other)
+ return _internal._MulScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -73,15 +73,15 @@ def __rmul__(self, other):
def __div__(self, other):
if isinstance(other, Symbol):
- return Symbol._Div(self, other)
+ return _internal._Div(self, other)
if isinstance(other, Number):
- return Symbol._DivScalar(self, scalar=other)
+ return _internal._DivScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
def __rdiv__(self, other):
if isinstance(other, Number):
- return Symbol._RDivScalar(self, scalar=other)
+ return _internal._RDivScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -93,9 +93,9 @@ def __rtruediv__(self, other):
def __pow__(self, other):
if isinstance(other, Symbol):
- return Symbol._Power(self, other)
+ return _internal._Power(self, other)
if isinstance(other, Number):
- return Symbol._PowerScalar(self, scalar=other)
+ return _internal._PowerScalar(self, scalar=other)
else:
raise TypeError('type %s not supported' % str(type(other)))
@@ -1091,11 +1091,12 @@ def _init_symbol_module():
check_call(_LIB.MXSymbolListAtomicSymbolCreators(ctypes.byref(size),
ctypes.byref(plist)))
module_obj = sys.modules[__name__]
+ module_internal = sys.modules["mxnet._symbol_internal"]
for i in range(size.value):
hdl = SymbolHandle(plist[i])
function = _make_atomic_symbol_function(hdl)
if function.__name__.startswith('_'):
- setattr(Symbol, function.__name__, staticmethod(function))
+ setattr(module_internal, function.__name__, function)
else:
setattr(module_obj, function.__name__, function)
@@ -1118,91 +1119,17 @@ def pow(base, exp):
result: Symbol or Number
"""
if isinstance(base, Symbol) and isinstance(exp, Symbol):
- return Symbol._Power(base, exp)
+ return _internal._Power(base, exp)
if isinstance(base, Symbol) and isinstance(exp, Number):
- return Symbol._PowerScalar(base, scalar=exp)
+ return _internal._PowerScalar(base, scalar=exp)
if isinstance(base, Number) and isinstance(exp, Symbol):
- return Symbol._RPowerScalar(exp, scalar=base)
+ return _internal._RPowerScalar(exp, scalar=base)
if isinstance(base, Number) and isinstance(exp, Number):
return base**exp
else:
raise TypeError('types (%s, %s) not supported' % (str(type(base)), str(type(exp))))
-# pylint: disable= undefined-variable, too-many-branches
-def _reduce(data, axis=None, keepdims=False, name=None, typ='sum'):
- """ Reduce the array along given axis. The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- data : Symbol
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Symbol
- Symbol represents the reduced Array.
- """
- if 'sum' == typ:
- reduce_func = sum_axis
- else:
- raise TypeError('typ=\'%s\' is not supported.' % typ)
- if axis is None:
- ret = reduce_func(data, axis=-1, keepdims=keepdims, name=name)
- return ret
- elif isinstance(axis, int):
- axis = [axis]
- elif isinstance(axis, tuple) or isinstance(axis, list):
- axis = list(axis)
- else:
- raise TypeError('\'%s\' object is not supported as axis.' % type(axis).__name__)
-
- for i in axis:
- if not isinstance(i, int):
- raise TypeError('\'%s\' object cannot be interpreted as an integer' % type(i).__name__)
- axis = sorted(axis)
- for i in axis:
- if i < 0:
- raise ValueError('\'axis\' entry is out of bounds')
- if len(set(axis)) != len(axis):
- raise ValueError('duplicate value in \'axis\'')
- assert (len(axis) != 0)
- ret = data
- for (i, ele) in enumerate(reversed(axis)):
- if i == (len(axis) - 1):
- ret = reduce_func(ret, axis=ele, keepdims=keepdims, name=name)
- else:
- ret = reduce_func(ret, axis=ele, keepdims=keepdims)
- return ret
-# pylint: enable= undefined-variable, too-many-branches
-
-
-def sum(data, axis=None, keepdims=False, name=None):
- """ Calculate the sum of the array along given axis.
- The semantic strictly follows numpy's document.
-
- Parameters
- ----------
- data : Symbol
- the array to be reduced
- axis : int or list(int), optional
- along which axis to do reduction
- keepdims : bool
- whether the reduced axis should be kept in the final shape
-
- Returns
- -------
- out: Symbol
- Symbol represents the reduced Array.
- """
- return _reduce(data=data, axis=axis, keepdims=keepdims, name=name, typ='sum')
-
-
-
# pylint: disable=no-member
# pylint: disable=redefined-builtin
def maximum(left, right):
@@ -1218,11 +1145,11 @@ def maximum(left, right):
result: Symbol or Number
"""
if isinstance(left, Symbol) and isinstance(right, Symbol):
- return Symbol._Maximum(left, right)
+ return _internal._Maximum(left, right)
if isinstance(left, Symbol) and isinstance(right, Number):
- return Symbol._MaximumScalar(left, scalar=right)
+ return _internal._MaximumScalar(left, scalar=right)
if isinstance(left, Number) and isinstance(right, Symbol):
- return Symbol._MaximumScalar(right, scalar=left)
+ return _internal._MaximumScalar(right, scalar=left)
if isinstance(left, Number) and isinstance(right, Number):
return left if left > right else right
else:
@@ -1244,11 +1171,11 @@ def minimum(left, right):
result: Symbol or Number
"""
if isinstance(left, Symbol) and isinstance(right, Symbol):
- return Symbol._Minimum(left, right)
+ return _internal._Minimum(left, right)
if isinstance(left, Symbol) and isinstance(right, Number):
- return Symbol._MinimumScalar(left, scalar=right)
+ return _internal._MinimumScalar(left, scalar=right)
if isinstance(left, Number) and isinstance(right, Symbol):
- return Symbol._MinimumScalar(right, scalar=left)
+ return _internal._MinimumScalar(right, scalar=left)
if isinstance(left, Number) and isinstance(right, Number):
return left if left > right else right
else:
diff --git a/scala-package/core/pom.xml b/scala-package/core/pom.xml
index 1cd795978f68..5900a0a710a2 100644
--- a/scala-package/core/pom.xml
+++ b/scala-package/core/pom.xml
@@ -61,7 +61,10 @@
org.scalatest
scalatest-maven-plugin
- -Djava.library.path=${project.parent.basedir}/native/${platform}/target
+
+ -Djava.library.path=${project.parent.basedir}/native/${platform}/target \
+ -Dlog4j.configuration=file://${project.basedir}/src/test/resources/log4j.properties
+
diff --git a/scala-package/core/scripts/get_cifar_data.sh b/scala-package/core/scripts/get_cifar_data.sh
index 48c4bfde2225..4b59e2c2ad4b 100755
--- a/scala-package/core/scripts/get_cifar_data.sh
+++ b/scala-package/core/scripts/get_cifar_data.sh
@@ -5,7 +5,7 @@ fi
cifar_data_path="./data/cifar10.zip"
if [ ! -f "$cifar_data_path" ]; then
- wget http://webdocs.cs.ualberta.ca/~bx3/data/cifar10.zip -P $data_path
+ wget http://data.dmlc.ml/mxnet/data/cifar10.zip -P $data_path
cd $data_path
unzip -u cifar10.zip
-fi
\ No newline at end of file
+fi
diff --git a/scala-package/core/scripts/get_mnist_data.sh b/scala-package/core/scripts/get_mnist_data.sh
index e080144f6663..359e399629cc 100755
--- a/scala-package/core/scripts/get_mnist_data.sh
+++ b/scala-package/core/scripts/get_mnist_data.sh
@@ -5,7 +5,7 @@ fi
mnist_data_path="./data/mnist.zip"
if [ ! -f "$mnist_data_path" ]; then
- wget http://webdocs.cs.ualberta.ca/~bx3/data/mnist.zip -P $data_path
+ wget http://data.dmlc.ml/mxnet/data/mnist.zip -P $data_path
cd $data_path
unzip -u mnist.zip
fi
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala
index d995176f1796..cf3bee93a98a 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Base.scala
@@ -43,7 +43,8 @@ object Base {
"Copying native library from the archive. " +
"Consider installing the library somewhere in the path " +
"(for Windows: PATH, for Linux: LD_LIBRARY_PATH), " +
- "or specifying by Java cmd option -Djava.library.path=[lib path].")
+ "or specifying by Java cmd option -Djava.library.path=[lib path]." +
+ "Exception:", e)
NativeLibraryLoader.loadLibrary("mxnet-scala")
}
} catch {
@@ -69,6 +70,7 @@ object Base {
System.loadLibrary(libname)
} catch {
case e: UnsatisfiedLinkError =>
+ logger.warn("Failed to load from native path. Exception:", e)
val os = System.getProperty("os.name")
// ref: http://lopica.sourceforge.net/os.html
if (os.startsWith("Linux")) {
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala
index 4dcc52e57efa..35aa2eef6ada 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/EvalMetric.scala
@@ -59,6 +59,7 @@ class Accuracy extends EvalMetric("accuracy") {
}
}
this.numInst += predLabel.shape(0)
+ predLabel.dispose()
}
}
}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Executor.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Executor.scala
index 523093c975f4..281f208843bb 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Executor.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Executor.scala
@@ -74,7 +74,9 @@ object Executor {
targets: Seq[Array[(Int, Int, NDArray)]]): Unit = {
for ((src, dTargets) <- data zip targets) {
for ((start, end, dst) <- dTargets) {
- src.slice(start, end).copyTo(dst)
+ val sliced = src.slice(start, end)
+ sliced.copyTo(dst)
+ sliced.dispose()
}
}
}
@@ -404,7 +406,7 @@ class DataParallelExecutorManager(symbol: Symbol,
private val trainExecs =
ctx.zipWithIndex.map { case (context, i) =>
val dataShapes =
- trainData.provideData.map { case (name: String, shape: Shape) =>
+ (trainData.provideData ++ trainData.provideLabel).map { case (name: String, shape: Shape) =>
(name, Shape(slices(i)._2 - slices(i)._1) ++ shape.drop(1))
}
symbol.simpleBind(context, "write", shapeDict = dataShapes)
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala
index e11d85fe7cb7..98ce1953243d 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala
@@ -39,6 +39,13 @@ class LibInfo {
useVars: Array[NDArrayHandle],
scalarArgs: Array[MXFloat],
mutateVars: Array[NDArrayHandle]): Int
+ @native def mxFuncInvokeEx(function: FunctionHandle,
+ useVars: Array[NDArrayHandle],
+ scalarArgs: Array[MXFloat],
+ mutateVars: Array[NDArrayHandle],
+ numParams: Int,
+ paramKeys: Array[Array[Byte]],
+ paramVals: Array[Array[Byte]]): Int
@native def mxNDArrayGetShape(handle: NDArrayHandle,
ndim: MXUintRef,
data: ArrayBuffer[Int]): Int
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala
index 7f1948b45d31..6807d6a72f31 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala
@@ -55,12 +55,12 @@ object NDArray {
if (output == null) {
require(acceptEmptyMutate, s"argument out is required to call $funcName")
output = new NDArray(newEmptyHandle())
+ addDependency(Array(lhs, rhs), Array(output))
}
checkCall(_LIB.mxFuncInvoke(handle,
Array(lhs.handle, rhs.handle),
Array[MXFloat](),
Array(output.handle)))
- addDependency(Array(lhs, rhs), Array(output))
case _ => throw new IllegalArgumentException(s"call $funcName as binary function")
}
output
@@ -76,12 +76,12 @@ object NDArray {
if (output == null) {
require(acceptEmptyMutate, s"argument out is required to call $funcName")
output = new NDArray(newEmptyHandle())
+ addDependency(Array(src), Array(output))
}
checkCall(_LIB.mxFuncInvoke(handle,
Array(src.handle),
Array[MXFloat](),
Array(output.handle)))
- addDependency(Array(src), Array(output))
case _ => throw new IllegalArgumentException(s"call $funcName as unary function")
}
output
@@ -91,14 +91,28 @@ object NDArray {
* Invoke this function by passing in parameters
*
* @param args Positional arguments of input scalars and NDArray
- * @param out NDArray or tuple of NDArray, optional
+ * @param kwargs: Key-value arguments for functions. e.g.,
+ * out: NDArray or tuple of NDArray, optional
* Output NDArray, used to hold the output result.
* @return The result NDArray(tuple) of result of computation.
*/
def invokeGenericFunc(funcName: String,
- args: Array[Any],
- out: Array[NDArray] = null): Array[NDArray] = {
- var mutateVars = out
+ args: Array[Any] = null,
+ kwargs: Map[String, Any] = null): Array[NDArray] = {
+ var mutateVars: Array[NDArray] = null
+ val realKwargs =
+ if (kwargs != null && kwargs.contains("out")) {
+ val out = kwargs("out")
+ mutateVars =
+ if (out.isInstanceOf[NDArray]) {
+ Array(kwargs("out").asInstanceOf[NDArray])
+ } else {
+ kwargs("out").asInstanceOf[Array[NDArray]]
+ }
+ kwargs - "out"
+ } else {
+ kwargs
+ }
val function = functions(funcName)
require(function != null, s"invalid function name $funcName")
function match {
@@ -109,17 +123,28 @@ object NDArray {
scalarRange: Range) =>
require(mutateVars == null || nMutateVars == mutateVars.length,
s"expect $nMutateVars in $funcName")
+ val useVars = useVarsRange.map(args(_).asInstanceOf[NDArray]).toArray
+ val scalarVars = scalarRange.map(args(_).asInstanceOf[MXFloat]).toArray
if (mutateVars == null) {
require(acceptEmptyMutate, s"argument out is required to call $funcName")
mutateVars = Array.fill[NDArray](nMutateVars)(new NDArray(newEmptyHandle()))
+ addDependency(useVars, mutateVars)
}
- val useVars = useVarsRange.map(args(_).asInstanceOf[NDArray]).toArray
- val scalarVars = scalarRange.map(args(_).asInstanceOf[MXFloat]).toArray
- checkCall(_LIB.mxFuncInvoke(handle,
+ val (numKwargs: Int,
+ kwargKeys: Option[Array[Array[Byte]]],
+ kwargVals: Option[Array[Array[Byte]]]) =
+ if (realKwargs == null) {
+ (0, None, None)
+ } else {
+ (realKwargs.size,
+ Some(realKwargs.keys.map(_.getBytes("ASCII") ++ Array(0.toByte)).toArray),
+ Some(realKwargs.values.map(_.toString.getBytes("ASCII") ++ Array(0.toByte)).toArray))
+ }
+ checkCall(_LIB.mxFuncInvokeEx(handle,
useVars.map(_.handle),
scalarVars,
- mutateVars.map(_.handle).array))
- addDependency(useVars, mutateVars)
+ mutateVars.map(_.handle).array,
+ numKwargs, kwargKeys.orNull, kwargVals.orNull))
case _ => throw new IllegalArgumentException(s"call $funcName as generic function")
}
mutateVars
@@ -464,11 +489,15 @@ object NDArray {
}
def randomUniform(low: Float, high: Float, out: NDArray): NDArray = {
- NDArray.invokeGenericFunc("_random_uniform", Array(low, high), Array(out))(0)
+ require(out != null)
+ NDArray.invokeGenericFunc("_sample_uniform", kwargs = Map[String, Any](
+ "low" -> low, "high" -> high, "shape" -> out.shape, "out" -> out))(0)
}
- def randomGaussian(mean: Float, stdvar: Float, out: NDArray): NDArray = {
- NDArray.invokeGenericFunc("_random_gaussian", Array(mean, stdvar), Array(out))(0)
+ def randomGaussian(loc: Float, scale: Float, out: NDArray): NDArray = {
+ require(out != null)
+ NDArray.invokeGenericFunc("_sample_normal", kwargs = Map[String, Any](
+ "loc" -> loc, "scale" -> scale, "shape" -> out.shape, "out" -> out))(0)
}
/**
@@ -743,7 +772,7 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
*/
def set(value: Float): NDArray = {
require(writable, "trying to assign to a readonly NDArray")
- NDArray.invokeGenericFunc("_set_value", Array[Any](value), out = Array(this))
+ NDArray.invokeGenericFunc("_set_value", Array[Any](value), Map[String, Any]("out" -> this))
this
}
@@ -777,7 +806,8 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
if (!writable) {
throw new IllegalArgumentException("trying to add to a readonly NDArray")
}
- NDArray.invokeGenericFunc("_plus_scalar", Array[Any](this, other), out = Array(this))
+ NDArray.invokeGenericFunc("_plus_scalar", Array[Any](this, other),
+ Map[String, Any]("out" -> this))
this
}
@@ -800,7 +830,8 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
if (!writable) {
throw new IllegalArgumentException("trying to subtract from a readonly NDArray")
}
- NDArray.invokeGenericFunc("_minus_scalar", Array[Any](this, other), out = Array(this))
+ NDArray.invokeGenericFunc("_minus_scalar", Array[Any](this, other),
+ Map[String, Any]("out" -> this))
this
}
@@ -827,7 +858,8 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
if (!writable) {
throw new IllegalArgumentException("trying to multiply to a readonly NDArray")
}
- NDArray.invokeGenericFunc("_mul_scalar", Array[Any](this, other), out = Array(this))
+ NDArray.invokeGenericFunc("_mul_scalar", Array[Any](this, other),
+ Map[String, Any]("out" -> this))
this
}
@@ -850,7 +882,8 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
if (!writable) {
throw new IllegalArgumentException("trying to divide from a readonly NDArray")
}
- NDArray.invokeGenericFunc("_div_scalar", Array[Any](this, other), out = Array(this))
+ NDArray.invokeGenericFunc("_div_scalar", Array[Any](this, other),
+ Map[String, Any]("out" -> this))
this
}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala
index 7b456b0b4bb2..7c233b1c8988 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala
@@ -7,11 +7,19 @@ object Optimizer {
new MXKVStoreUpdater {
val states = new scala.collection.mutable.HashMap[Int, AnyRef]
override def update(index: Int, grad: NDArray, weight: NDArray): Unit = {
- val state = states.getOrElseUpdate(index, optimizer.createState(index, weight))
+ val state =
+ if (states.contains(index)) {
+ states.get(index).get
+ } else {
+ val newState = optimizer.createState(index, weight)
+ states.put(index, newState)
+ newState
+ }
optimizer.update(index, weight, grad, state)
}
override def dispose(): Unit = {
states.values.foreach(optimizer.disposeState)
+ states.clear()
}
}
}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Random.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Random.scala
index e1279e095dfa..c66dd32cc6a8 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Random.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Random.scala
@@ -37,15 +37,15 @@ object Random {
/**
* Generate normal(Gaussian) distribution N(mean, stdvar^^2) with shape.
*
- * @param mean The mean of the normal distribution.
- * @param stdvar The standard deviation of normal distribution.
+ * @param loc The mean of the normal distribution.
+ * @param scale The standard deviation of normal distribution.
* @param shape Output shape of the NDArray generated.
* @param ctx Context of output NDArray, will use default context if not specified.
* @param out Output place holder
* @return The result NDArray with generated result.
*/
- def normal(mean: Float,
- stdvar: Float,
+ def normal(loc: Float,
+ scale: Float,
shape: Shape = null,
ctx: Context = null,
out: NDArray = null): NDArray = {
@@ -56,7 +56,7 @@ object Random {
require(shape != null, "shape is required when out is not specified")
outCopy = empty(shape, ctx)
}
- randomGaussian(mean, stdvar, outCopy)
+ randomGaussian(loc, scale, outCopy)
}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/MXDataIter.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/MXDataIter.scala
index 41e9ef1cf9b4..929630065926 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/MXDataIter.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/MXDataIter.scala
@@ -29,9 +29,11 @@ class MXDataIter private[mxnet](private[mxnet] val handle: DataIterHandle,
iterNext()
val data = currentBatch.data(0)
val label = currentBatch.label(0)
- reset()
// properties
- (Map(dataName -> data.shape), Map(labelName -> label.shape), data.shape(0))
+ val res = (Map(dataName -> data.shape), Map(labelName -> label.shape), data.shape(0))
+ currentBatch.dispose()
+ reset()
+ res
} else {
(null, null, 0)
}
diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/PrefetchingIter.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/PrefetchingIter.scala
index 5ae522658581..8aa821daf628 100644
--- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/PrefetchingIter.scala
+++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/PrefetchingIter.scala
@@ -2,62 +2,161 @@ package ml.dmlc.mxnet.io
import ml.dmlc.mxnet.{DataBatch, DataIter, NDArray, Shape}
import org.slf4j.LoggerFactory
+import java.util.concurrent.Semaphore
/**
- * TODO
* Base class for prefetching iterators. Takes one or more DataIters
* and combine them with prefetching.
*
+ * @author Depeng Liang
*
* @param iters list of DataIters
* @param dataNames
* @param labelNames
*/
class PrefetchingIter(val iters: IndexedSeq[DataIter],
- val dataNames: Map[String, String] = null,
- val labelNames: Map[String, String] = null) extends DataIter {
+ val dataNames: IndexedSeq[Map[String, String]] = null,
+ val labelNames: IndexedSeq[Map[String, String]] = null) extends DataIter {
private val logger = LoggerFactory.getLogger(classOf[PrefetchingIter])
+ require(iters.length > 0, "Iters length must be greater than 0")
+
+ private val _provideData: Map[String, Shape] = {
+ if (dataNames == null) {
+ iters.map(_.provideData).foldLeft(Map[String, Shape]()) { (acc, elem) =>
+ acc ++ elem
+ }
+ } else {
+ iters.zipWithIndex.map(tu => (tu._1.provideData, tu._2))
+ .map(m => m._1.map(t => (dataNames(m._2)(t._1), t._2)))
+ .foldLeft(Map[String, Shape]()) { (acc, elem) =>
+ acc ++ elem
+ }
+ }
+ }
+
+ private val _provideLabel: Map[String, Shape] = {
+ if (labelNames == null) {
+ iters.map(_.provideLabel).foldLeft(Map[String, Shape]()) { (acc, elem) =>
+ acc ++ elem
+ }
+ } else {
+ iters.zipWithIndex.map(tu => (tu._1.provideLabel, tu._2))
+ .map(m => m._1.map(t => (labelNames(m._2)(t._1), t._2)))
+ .foldLeft(Map[String, Shape]()) { (acc, elem) =>
+ acc ++ elem
+ }
+ }
+ }
+
+ private val _batchSize: Int = this._provideData.toList(0)._2(0)
+ private val dataReady: IndexedSeq[Semaphore] =
+ (0 until iters.length).map(i => new Semaphore(0))
+ private val dataTaken: IndexedSeq[Semaphore] =
+ (0 until iters.length).map(i => new Semaphore(1))
+
+ @volatile private var started: Boolean = true
private var currentBatch: DataBatch = null
- private var nextBatch: DataBatch = null
+ private var nextBatch: Array[DataBatch] = (0 until iters.length).map { i =>
+ new DataBatch(null, null, null, 0)
+ }.toArray
+
+ // thread entry
+ def prefetchFunc(i: Int): Runnable = new Runnable {
+ override def run(): Unit = {
+ while (started) {
+ dataTaken(i).acquire()
+ if (started) {
+ try {
+ nextBatch(i) = iters(i).next()
+ } catch {
+ case ex: NoSuchElementException => nextBatch(i) = null
+ }
+ }
+ dataReady(i).release()
+ }
+ }
+ }
+
+ private val prefetchThreads =
+ for (i <- 0 until iters.length) yield new Thread(prefetchFunc(i))
+ prefetchThreads.foreach(_.start())
+
+ override def next(): DataBatch = currentBatch
/**
* reset the iterator
*/
- override def reset(): Unit = ???
+ override def reset(): Unit = {
+ for (e <- dataReady) e.acquire()
+ for (i <- iters) i.reset()
+ for (e <- dataTaken) e.release()
+ }
+
+ override def batchSize: Int = this._batchSize
/**
* get data of current batch
* @return the data of current batch
*/
- override def getData(): IndexedSeq[NDArray] = ???
+ override def getData(): IndexedSeq[NDArray] = currentBatch.data
/**
* Get label of current batch
* @return the label of current batch
*/
- override def getLabel(): IndexedSeq[NDArray] = ???
+ override def getLabel(): IndexedSeq[NDArray] = currentBatch.label
/**
* the index of current batch
* @return
*/
- override def getIndex(): IndexedSeq[Long] = ???
+ override def getIndex(): IndexedSeq[Long] = currentBatch.index
// The name and shape of label provided by this iterator
- override def provideLabel: Map[String, Shape] = ???
+ override def provideLabel: Map[String, Shape] = this._provideLabel
/**
* get the number of padding examples
* in current batch
* @return number of padding examples in current batch
*/
- override def getPad(): Int = ???
+ override def getPad(): Int = this.currentBatch.pad
// The name and shape of data provided by this iterator
- override def provideData: Map[String, Shape] = ???
+ override def provideData: Map[String, Shape] = this._provideData
- override def hasNext: Boolean = ???
+ override def hasNext: Boolean = {
+ for (e <- dataReady) e.acquire()
+ if (nextBatch(0) == null) {
+ for (i <- nextBatch) {
+ assert(i == null, "Number of entry mismatches between iterators")
+ }
+ for (e <- dataReady) e.release()
+ false
+ } else {
+ for (batch <- nextBatch) {
+ assert(batch.pad == nextBatch(0).pad,
+ "Number of entry mismatches between iterators")
+ }
+ val datas = for (batch <- nextBatch) yield batch.data
+ val labels = for (batch <- nextBatch) yield batch.label
+ currentBatch = new DataBatch(datas.toIndexedSeq.flatten,
+ labels.toIndexedSeq.flatten,
+ nextBatch(0).index,
+ nextBatch(0).pad)
+ for (e <- dataTaken) e.release()
+ true
+ }
+ }
- override def batchSize: Int = ???
+ /**
+ * Stop all its internal prefetching threads.
+ * The object shall never be used after it is disposed.
+ */
+ def dispose(): Unit = {
+ started = false
+ for (e <- dataTaken) e.release()
+ for (t <- prefetchThreads) t.join()
+ }
}
diff --git a/scala-package/core/src/test/resources/log4j.properties b/scala-package/core/src/test/resources/log4j.properties
new file mode 100644
index 000000000000..7d7ca36b28a1
--- /dev/null
+++ b/scala-package/core/src/test/resources/log4j.properties
@@ -0,0 +1,7 @@
+# for development debugging
+log4j.rootLogger = debug, stdout
+
+log4j.appender.stdout = org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target = System.out
+log4j.appender.stdout.layout = org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c] [%p] - %m%n
diff --git a/scala-package/core/src/test/scala/ml/dmlc/mxnet/IOSuite.scala b/scala-package/core/src/test/scala/ml/dmlc/mxnet/IOSuite.scala
index 38d8adf930e1..5fefc0704ba4 100644
--- a/scala-package/core/src/test/scala/ml/dmlc/mxnet/IOSuite.scala
+++ b/scala-package/core/src/test/scala/ml/dmlc/mxnet/IOSuite.scala
@@ -1,6 +1,6 @@
package ml.dmlc.mxnet
-import ml.dmlc.mxnet.io.{NDArrayIter, ResizeIter}
+import ml.dmlc.mxnet.io.{NDArrayIter, ResizeIter, PrefetchingIter}
import org.scalatest.{BeforeAndAfterAll, FunSuite}
import scala.sys.process._
@@ -150,6 +150,69 @@ class IOSuite extends FunSuite with BeforeAndAfterAll {
assert(batchCount === nBatch)
}
+ test("test PrefetchIter") {
+ // get data
+ "./scripts/get_mnist_data.sh" !
+
+ val params = Map(
+ "image" -> "data/train-images-idx3-ubyte",
+ "label" -> "data/train-labels-idx1-ubyte",
+ "data_shape" -> "(784,)",
+ "batch_size" -> "100",
+ "shuffle" -> "1",
+ "flat" -> "1",
+ "silent" -> "0",
+ "seed" -> "10"
+ )
+
+ val mnistPack1 = IO.MNISTPack(params)
+ val mnistPack2 = IO.MNISTPack(params)
+
+ val nBatch = 600
+ var batchCount = 0
+
+ val mnistIter1 = mnistPack1.iterator
+ val mnistIter2 = mnistPack2.iterator
+
+ var prefetchIter = new PrefetchingIter(
+ IndexedSeq(mnistIter1, mnistIter2),
+ IndexedSeq(Map("data" -> "data1"), Map("data" -> "data2")),
+ IndexedSeq(Map("label" -> "label1"), Map("label" -> "label2"))
+ )
+
+ // test loop
+ while(prefetchIter.hasNext) {
+ prefetchIter.next()
+ batchCount += 1
+ }
+ assert(nBatch === batchCount)
+
+ // test provideData
+ val provideData = prefetchIter.provideData
+ val provideLabel = prefetchIter.provideLabel
+ assert(provideData("data1") === Shape(100, 784))
+ assert(provideData("data2") === Shape(100, 784))
+ assert(provideLabel("label1") === Shape(100))
+ assert(provideLabel("label2") === Shape(100))
+
+ // test reset
+ prefetchIter.reset()
+ prefetchIter.next()
+ val label0 = prefetchIter.getLabel().head.toArray
+ val data0 = prefetchIter.getData().head.toArray
+ prefetchIter.next()
+ prefetchIter.next()
+ prefetchIter.next()
+ prefetchIter.reset()
+ prefetchIter.next()
+ val label1 = prefetchIter.getLabel().head.toArray
+ val data1 = prefetchIter.getData().head.toArray
+ assert(label0 === label1)
+ assert(data0 === data1)
+
+ prefetchIter.dispose()
+ }
+
test("test NDArrayIter") {
val shape0 = Shape(Array(1000, 2, 2))
val data = IndexedSeq(NDArray.ones(shape0), NDArray.zeros(shape0))
diff --git a/scala-package/core/src/test/scala/ml/dmlc/mxnet/train/ConvSuite.scala b/scala-package/core/src/test/scala/ml/dmlc/mxnet/train/ConvSuite.scala
index 0f356bfb5fa4..57c065f2e86b 100644
--- a/scala-package/core/src/test/scala/ml/dmlc/mxnet/train/ConvSuite.scala
+++ b/scala-package/core/src/test/scala/ml/dmlc/mxnet/train/ConvSuite.scala
@@ -91,6 +91,6 @@ class ConvSuite extends FunSuite with BeforeAndAfterAll {
}
val acc = numCorrect.toFloat / numInst
logger.info(s"Final accuracy = $acc")
- assert(acc > 0.96)
+ assert(acc > 0.95)
}
}
diff --git a/scala-package/examples/scripts/rnn/run_test_charrnn.sh b/scala-package/examples/scripts/rnn/run_test_charrnn.sh
new file mode 100644
index 000000000000..3abe98917a8f
--- /dev/null
+++ b/scala-package/examples/scripts/rnn/run_test_charrnn.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
+CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
+
+# you can get the training data file using the following command
+# wget http://data.dmlc.ml/mxnet/data/lab_data.zip
+# unzip -o lab_data.zip
+# for example ./datas/obama.txt
+DATA_PATH=$1
+# for example ./models/obama
+MODEL_PREFIX=$2
+# feel free to change the starter sentence
+STARTER_SENTENCE="The joke"
+
+java -Xmx4G -cp $CLASS_PATH \
+ ml.dmlc.mxnet.examples.rnn.TestCharRnn \
+ --data-path $DATA_PATH \
+ --model-prefix $MODEL_PREFIX \
+ --starter-sentence "$STARTER_SENTENCE"
diff --git a/scala-package/examples/scripts/rnn/run_train_charrnn.sh b/scala-package/examples/scripts/rnn/run_train_charrnn.sh
new file mode 100644
index 000000000000..04379d33401a
--- /dev/null
+++ b/scala-package/examples/scripts/rnn/run_train_charrnn.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
+CLASS_PATH=$MXNET_ROOT/scala-package/assembly/linux-x86_64-gpu/target/*:$MXNET_ROOT/scala-package/examples/target/*:$MXNET_ROOT/scala-package/examples/target/classes/lib/*
+
+# which gpu card to use, -1 means cpu
+GPU=$1
+# you can get the training data file using the following command
+# wget http://data.dmlc.ml/mxnet/data/lab_data.zip
+# unzip -o lab_data.zip
+# for example ./datas/obama.txt
+DATA_PATH=$2
+# for example ./models
+SAVE_MODEL_PATH=$3
+
+java -Xmx4G -cp $CLASS_PATH \
+ ml.dmlc.mxnet.examples.rnn.TrainCharRnn \
+ --data-path $DATA_PATH \
+ --save-model-path $SAVE_MODEL_PATH \
+ --gpu $GPU \
diff --git a/scala-package/examples/src/main/resources/log4j.properties b/scala-package/examples/src/main/resources/log4j.properties
index 7d7ca36b28a1..cb92f4c5250a 100644
--- a/scala-package/examples/src/main/resources/log4j.properties
+++ b/scala-package/examples/src/main/resources/log4j.properties
@@ -1,5 +1,5 @@
# for development debugging
-log4j.rootLogger = debug, stdout
+log4j.rootLogger = info, stdout
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target = System.out
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/ModelTrain.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/ModelTrain.scala
index d2605a152b4a..97deaf3123b2 100644
--- a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/ModelTrain.scala
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/ModelTrain.scala
@@ -15,8 +15,7 @@ object ModelTrain {
lr: Float = 0.1f, lrFactor: Float = 1f, lrFactorEpoch: Float = 1f,
clipGradient: Float = 0f, monitorSize: Int = -1): Unit = {
// kvstore
- // TODO: if local mode and no gpu is used, set kv = null
- val kv = KVStore.create(kvStore)
+ var kv = KVStore.create(kvStore)
// load model
val modelPrefixWithRank =
@@ -62,6 +61,12 @@ object ModelTrain {
lrScheduler = lrScheduler, clipGradient = clipGradient,
momentum = 0.9f, wd = 0.00001f)
+ // disable kvstore for single device
+ if (kv.`type`.contains("local") && (devs.length == 1 || devs(0).deviceType != "gpu")) {
+ kv.dispose()
+ kv = null
+ }
+
val model = new FeedForward(ctx = devs,
symbol = network,
numEpoch = numEpochs,
@@ -80,7 +85,9 @@ object ModelTrain {
kvStore = kv,
batchEndCallback = new Speedometer(batchSize, 50),
epochEndCallback = checkpoint)
- kv.dispose()
+ if (kv != null) {
+ kv.dispose()
+ }
}
// scalastyle:on parameterNum
}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/TrainMnist.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/TrainMnist.scala
index f9acac462f10..44792cf4fc00 100644
--- a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/TrainMnist.scala
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/imclassification/TrainMnist.scala
@@ -102,9 +102,9 @@ object TrainMnist {
envs.put("DMLC_NUM_WORKER", inst.numWorker.toString)
require(inst.numServer > 0, "Num of servers must > 0")
envs.put("DMLC_NUM_SERVER", inst.numServer.toString)
+ logger.info("Init PS environments")
+ KVStoreServer.init(envs.toMap)
}
- logger.info("Init PS environments")
- KVStoreServer.init(envs.toMap)
if (inst.role != "worker") {
logger.info("Start KVStoreServer for scheduler & servers")
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/ButketIo.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/ButketIo.scala
new file mode 100644
index 000000000000..9a11f6fa2950
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/ButketIo.scala
@@ -0,0 +1,205 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import ml.dmlc.mxnet.{DataBatch, DataIter, NDArray, Shape}
+import org.slf4j.LoggerFactory
+import scala.io.Source
+import scala.util.Random
+
+/**
+ * @author Depeng Liang
+ */
+object ButketIo {
+
+ type Text2Id = (String, Map[String, Int]) => Array[Int]
+ type ReadContent = String => String
+
+ def defaultReadContent(path: String): String = {
+ val content = Source.fromFile(path).mkString
+ .replaceAll("\n", " ")
+ .replaceAll(". ", " ")
+ content
+ }
+
+ def defaultText2Id(sentence: String, theVocab: Map[String, Int]): Array[Int] = {
+ val words = {
+ val tmp = sentence.split(" ").filter(_.length() > 0)
+ for (w <- tmp) yield theVocab(w)
+ }
+ words.toArray
+ }
+
+ def defaultGenBuckets(sentences: Array[String], batchSize: Int,
+ theVocab: Map[String, Int]): List[Int] = {
+ val lenDict = scala.collection.mutable.Map[Int, Int]()
+ var maxLen = -1
+ for (sentence <- sentences) {
+ val wordsLen = defaultText2Id(sentence, theVocab).length
+ if (wordsLen > 0) {
+ if (wordsLen > maxLen) {
+ maxLen = wordsLen
+ }
+ if (lenDict.contains(wordsLen)) {
+ lenDict(wordsLen) = lenDict(wordsLen) + 1
+ } else {
+ lenDict += wordsLen -> 1
+ }
+ }
+ }
+
+ var tl = 0
+ var buckets = List[Int]()
+ lenDict.foreach {
+ case (l, n) =>
+ if (n + tl >= batchSize) {
+ buckets = buckets :+ l
+ tl = 0
+ } else tl += n
+ }
+ if (tl > 0) buckets = buckets :+ maxLen
+ buckets
+ }
+
+ class BucketSentenceIter(
+ path: String, vocab: Map[String, Int], var buckets: List[Int],
+ _batchSize: Int, initStates: IndexedSeq[(String, (Int, Int))],
+ seperateChar: String = " ", text2Id: Text2Id = defaultText2Id,
+ readContent: ReadContent = defaultReadContent) extends DataIter {
+
+ private val logger = LoggerFactory.getLogger(classOf[BucketSentenceIter])
+
+ private val content = readContent(path)
+ private val sentences = content.split(seperateChar)
+
+ if (buckets.length == 0) {
+ buckets = defaultGenBuckets(sentences, batchSize, vocab)
+ }
+ buckets = buckets.sorted
+ // pre-allocate with the largest bucket for better memory sharing
+ private val defaultBucketKey = (buckets(0) /: buckets.drop(1)) { (max, elem) =>
+ if (max < elem) elem else max
+ }
+ // we just ignore the sentence it is longer than the maximum
+ // bucket size here
+ private val data = buckets.indices.map(x => Array[Array[Float]]()).toArray
+ for (sentence <- sentences) {
+ val ids = text2Id(sentence, vocab)
+ if (ids.length > 0) {
+ buckets.indices.foreach { idx =>
+ if (buckets(idx) >= ids.length) {
+ data(idx) = data(idx) :+
+ (ids.map(_.toFloat) ++ Array.fill[Float](buckets(idx) - ids.length)(0f))
+ }
+ }
+ }
+ }
+
+ // Get the size of each bucket, so that we could sample
+ // uniformly from the bucket
+ private val bucketSizes = data.map(_.length)
+ logger.info("Summary of dataset ==================")
+ buckets.zip(bucketSizes).foreach {
+ case (bkt, size) => logger.info(s"bucket of len $bkt : $size samples")
+ }
+
+ // make a random data iteration plan
+ // truncate each bucket into multiple of batch-size
+ private var bucketNBatches = Array[Int]()
+ for (i <- data.indices) {
+ bucketNBatches = bucketNBatches :+ (data(i).length / _batchSize)
+ data(i) = data(i).take(bucketNBatches(i) * _batchSize)
+ }
+
+ private val bucketPlan = {
+ val plan = bucketNBatches.zipWithIndex.map(x => Array.fill[Int](x._1)(x._2)).flatten
+ Random.shuffle(plan.toList)
+ }
+
+ private val bucketIdxAll = data.map(_.length).toList
+ .map(l => Random.shuffle((0 until l).toList))
+ private val bucketCurrIdx = data.map(x => 0)
+
+ private var dataBuffer = Array[NDArray]()
+ private var labelBuffer = Array[NDArray]()
+ for (iBucket <- data.indices) {
+ dataBuffer = dataBuffer :+ NDArray.zeros(_batchSize, buckets(iBucket))
+ labelBuffer = labelBuffer :+ NDArray.zeros(_batchSize, buckets(iBucket))
+ }
+
+ private val _provideData = {
+ val tmp = Map("data" -> Shape(_batchSize, defaultBucketKey))
+ tmp ++ initStates.map(x => x._1 -> Shape(x._2._1, x._2._2))
+ }
+ private val _provideLabel = Map("softmax_label" -> Shape(_batchSize, defaultBucketKey))
+
+ private var iBucket = 0
+
+ override def next(): DataBatch = {
+ val bucketIdx = bucketPlan(iBucket)
+ val dataBuf = dataBuffer(bucketIdx)
+ val iIdx = bucketCurrIdx(bucketIdx)
+ val idx = bucketIdxAll(bucketIdx).drop(iIdx).take(_batchSize)
+ bucketCurrIdx(bucketIdx) = bucketCurrIdx(bucketIdx) + _batchSize
+
+ val datas = idx.map(i => data(bucketIdx)(i)).toArray
+ for (sentence <- datas) {
+ assert(sentence.length == buckets(bucketIdx))
+ }
+ dataBuf.set(datas.flatten)
+
+ val labelBuf = labelBuffer(bucketIdx)
+ val labels = idx.map(i => data(bucketIdx)(i).drop(1) :+ 0f).toArray
+ labelBuf.set(labels.flatten)
+
+ iBucket += 1
+ new DataBatch(IndexedSeq(dataBuf),
+ IndexedSeq(labelBuf),
+ getIndex(),
+ getPad())
+ }
+
+ /**
+ * reset the iterator
+ */
+ override def reset(): Unit = {
+ iBucket = 0
+ bucketCurrIdx.indices.map(i => bucketCurrIdx(i) = 0)
+ }
+
+ override def batchSize: Int = _batchSize
+
+ /**
+ * get data of current batch
+ * @return the data of current batch
+ */
+ override def getData(): IndexedSeq[NDArray] = IndexedSeq(dataBuffer(bucketPlan(iBucket)))
+
+ /**
+ * Get label of current batch
+ * @return the label of current batch
+ */
+ override def getLabel(): IndexedSeq[NDArray] = IndexedSeq(labelBuffer(bucketPlan(iBucket)))
+
+ /**
+ * the index of current batch
+ * @return
+ */
+ override def getIndex(): IndexedSeq[Long] = IndexedSeq[Long]()
+
+ // The name and shape of label provided by this iterator
+ override def provideLabel: Map[String, Shape] = this._provideLabel
+
+ /**
+ * get the number of padding examples
+ * in current batch
+ * @return number of padding examples in current batch
+ */
+ override def getPad(): Int = 0
+
+ // The name and shape of data provided by this iterator
+ override def provideData: Map[String, Shape] = this._provideData
+
+ override def hasNext: Boolean = {
+ if (iBucket < bucketPlan.length) true else false
+ }
+ }
+}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Lstm.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Lstm.scala
new file mode 100644
index 000000000000..0e2e5f7de66b
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Lstm.scala
@@ -0,0 +1,148 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import ml.dmlc.mxnet.Symbol
+import ml.dmlc.mxnet.Executor
+
+/**
+ * @author Depeng Liang
+ */
+object Lstm {
+
+ final case class LSTMState(c: Symbol, h: Symbol)
+ final case class LSTMParam(i2hWeight: Symbol, i2hBias: Symbol,
+ h2hWeight: Symbol, h2hBias: Symbol)
+
+ // LSTM Cell symbol
+ def lstm(numHidden: Int, inData: Symbol, prevState: LSTMState,
+ param: LSTMParam, seqIdx: Int, layerIdx: Int, dropout: Float = 0f): LSTMState = {
+ val inDataa = {
+ if (dropout > 0f) Symbol.Dropout()(Map("data" -> inData, "p" -> dropout))
+ else inData
+ }
+ val i2h = Symbol.FullyConnected(s"t${seqIdx}_l${layerIdx}_i2h")(Map("data" -> inDataa,
+ "weight" -> param.i2hWeight,
+ "bias" -> param.i2hBias,
+ "num_hidden" -> numHidden * 4))
+ val h2h = Symbol.FullyConnected(s"t${seqIdx}_l${layerIdx}_h2h")(Map("data" -> prevState.h,
+ "weight" -> param.h2hWeight,
+ "bias" -> param.h2hBias,
+ "num_hidden" -> numHidden * 4))
+ val gates = i2h + h2h
+ val sliceGates = Symbol.SliceChannel(s"t${seqIdx}_l${layerIdx}_slice")(Array(gates),
+ Map("num_outputs" -> 4))
+ val ingate = Symbol.Activation()(Map("data" -> sliceGates.get(0), "act_type" -> "sigmoid"))
+ val inTransform = Symbol.Activation()(Map("data" -> sliceGates.get(1), "act_type" -> "tanh"))
+ val forgetGate = Symbol.Activation()(Map("data" -> sliceGates.get(2), "act_type" -> "sigmoid"))
+ val outGate = Symbol.Activation()(Map("data" -> sliceGates.get(3), "act_type" -> "sigmoid"))
+ val nextC = (forgetGate * prevState.c) + (ingate * inTransform)
+ val nextH = outGate * Symbol.Activation()(Map("data" -> nextC, "act_type" -> "tanh"))
+ LSTMState(c = nextC, h = nextH)
+ }
+
+ // we define a new unrolling function here because the original
+ // one in lstm.py concats all the labels at the last layer together,
+ // making the mini-batch size of the label different from the data.
+ // I think the existing data-parallelization code need some modification
+ // to allow this situation to work properly
+ def lstmUnroll(numLstmLayer: Int, seqLen: Int, inputSize: Int, numHidden: Int,
+ numEmbed: Int, numLabel: Int, dropout: Float = 0f): Symbol = {
+ val embedWeight = Symbol.Variable("embed_weight")
+ val clsWeight = Symbol.Variable("cls_weight")
+ val clsBias = Symbol.Variable("cls_bias")
+
+ var paramCells = Array[LSTMParam]()
+ var lastStates = Array[LSTMState]()
+ for (i <- 0 until numLstmLayer) {
+ paramCells = paramCells :+ LSTMParam(i2hWeight = Symbol.Variable(s"l${i}_i2h_weight"),
+ i2hBias = Symbol.Variable(s"l${i}_i2h_bias"),
+ h2hWeight = Symbol.Variable(s"l${i}_h2h_weight"),
+ h2hBias = Symbol.Variable(s"l${i}_h2h_bias"))
+ lastStates = lastStates :+ LSTMState(c = Symbol.Variable(s"l${i}_init_c"),
+ h = Symbol.Variable(s"l${i}_init_h"))
+ }
+ assert(lastStates.length == numLstmLayer)
+
+ // embeding layer
+ val data = Symbol.Variable("data")
+ var label = Symbol.Variable("softmax_label")
+ val embed = Symbol.Embedding("embed")(Map("data" -> data, "input_dim" -> inputSize,
+ "weight" -> embedWeight, "output_dim" -> numEmbed))
+ val wordvec = Symbol.SliceChannel()(Array(embed),
+ Map("num_outputs" -> seqLen, "squeeze_axis" -> true))
+
+ var hiddenAll = Array[Symbol]()
+ var dpRatio = 0f
+ var hidden: Symbol = null
+ for (seqIdx <- 0 until seqLen) {
+ hidden = wordvec.get(seqIdx)
+ // stack LSTM
+ for (i <- 0 until numLstmLayer) {
+ if (i == 0) dpRatio = 0f else dpRatio = dropout
+ val nextState = lstm(numHidden, inData = hidden,
+ prevState = lastStates(i),
+ param = paramCells(i),
+ seqIdx = seqIdx, layerIdx = i, dropout = dpRatio)
+ hidden = nextState.h
+ lastStates(i) = nextState
+ }
+ // decoder
+ if (dropout > 0f) hidden = Symbol.Dropout()(Map("data" -> hidden, "p" -> dropout))
+ hiddenAll = hiddenAll :+ hidden
+ }
+ val hiddenConcat = Symbol.Concat()(hiddenAll, Map("dim" -> 0))
+ val pred = Symbol.FullyConnected("pred")(Map("data" -> hiddenConcat, "num_hidden" -> numLabel,
+ "weight" -> clsWeight, "bias" -> clsBias))
+ label = Symbol.transpose(label)
+ label = Symbol.Reshape()(Map("data" -> label, "target_shape" -> "(0,)"))
+ val sm = Symbol.SoftmaxOutput("softmax")(Map("data" -> pred, "label" -> label))
+ sm
+ }
+
+ def lstmInferenceSymbol(numLstmLayer: Int, inputSize: Int, numHidden: Int,
+ numEmbed: Int, numLabel: Int, dropout: Float = 0f): Symbol = {
+ val seqIdx = 0
+ val embedWeight = Symbol.Variable("embed_weight")
+ val clsWeight = Symbol.Variable("cls_weight")
+ val clsBias = Symbol.Variable("cls_bias")
+
+ var paramCells = Array[LSTMParam]()
+ var lastStates = Array[LSTMState]()
+ for (i <- 0 until numLstmLayer) {
+ paramCells = paramCells :+ LSTMParam(i2hWeight = Symbol.Variable(s"l${i}_i2h_weight"),
+ i2hBias = Symbol.Variable(s"l${i}_i2h_bias"),
+ h2hWeight = Symbol.Variable(s"l${i}_h2h_weight"),
+ h2hBias = Symbol.Variable(s"l${i}_h2h_bias"))
+ lastStates = lastStates :+ LSTMState(c = Symbol.Variable(s"l${i}_init_c"),
+ h = Symbol.Variable(s"l${i}_init_h"))
+ }
+ assert(lastStates.length == numLstmLayer)
+
+ val data = Symbol.Variable("data")
+
+ var hidden = Symbol.Embedding("embed")(Map("data" -> data, "input_dim" -> inputSize,
+ "weight" -> embedWeight, "output_dim" -> numEmbed))
+
+ var dpRatio = 0f
+ // stack LSTM
+ for (i <- 0 until numLstmLayer) {
+ if (i == 0) dpRatio = 0f else dpRatio = dropout
+ val nextState = lstm(numHidden, inData = hidden,
+ prevState = lastStates(i),
+ param = paramCells(i),
+ seqIdx = seqIdx, layerIdx = i, dropout = dpRatio)
+ hidden = nextState.h
+ lastStates(i) = nextState
+ }
+ // decoder
+ if (dropout > 0f) hidden = Symbol.Dropout()(Map("data" -> hidden, "p" -> dropout))
+ val fc = Symbol.FullyConnected("pred")(Map("data" -> hidden, "num_hidden" -> numLabel,
+ "weight" -> clsWeight, "bias" -> clsBias))
+ val sm = Symbol.SoftmaxOutput("softmax")(Map("data" -> fc))
+ var output = Array(sm)
+ for (state <- lastStates) {
+ output = output :+ state.c
+ output = output :+ state.h
+ }
+ Symbol.Group(output: _*)
+ }
+}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/RnnModel.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/RnnModel.scala
new file mode 100644
index 000000000000..b91835f7d076
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/RnnModel.scala
@@ -0,0 +1,55 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import ml.dmlc.mxnet.Context
+import ml.dmlc.mxnet.NDArray
+import ml.dmlc.mxnet.Shape
+import ml.dmlc.mxnet.Symbol
+
+object RnnModel {
+ class LSTMInferenceModel(numLstmLayer: Int, inputSize: Int, numHidden: Int,
+ numEmbed: Int, numLabel: Int, argParams: Map[String, NDArray],
+ ctx: Context = Context.cpu(), dropout: Float = 0f) {
+ private val sym = Lstm.lstmInferenceSymbol(numLstmLayer,
+ inputSize,
+ numHidden,
+ numEmbed,
+ numLabel,
+ dropout)
+ private val batchSize = 1
+ private val initC = (for (l <- 0 until numLstmLayer)
+ yield (s"l${l}_init_c" -> Shape(batchSize, numHidden))).toMap
+ private val initH = (for (l <- 0 until numLstmLayer)
+ yield (s"l${l}_init_h" -> Shape(batchSize, numHidden))).toMap
+ private val dataShape = Map("data" -> Shape(batchSize))
+ private val inputShape = initC ++ initH ++ dataShape
+ private val executor = sym.simpleBind(ctx = ctx, shapeDict = inputShape)
+
+ for (key <- this.executor.argDict.keys) {
+ if (!inputShape.contains(key) && argParams.contains(key) && key != "softmax_label") {
+ argParams(key).copyTo(this.executor.argDict(key))
+ }
+ }
+
+ private var stateName = (Array[String]() /: (0 until numLstmLayer)) { (acc, i) =>
+ acc :+ s"l${i}_init_c" :+ s"l${i}_init_h"
+ }
+
+ private val statesDict = stateName.zip(this.executor.outputs.drop(1)).toMap
+ private val inputArr = NDArray.zeros(dataShape("data"))
+
+ def forward(inputData: NDArray, newSeq: Boolean = false): Array[Float] = {
+ if (newSeq == true) {
+ for (key <- this.statesDict.keys) {
+ this.executor.argDict(key).set(0f)
+ }
+ }
+ inputData.copyTo(this.executor.argDict("data"))
+ this.executor.forward()
+ for (key <- this.statesDict.keys) {
+ this.statesDict(key).copyTo(this.executor.argDict(key))
+ }
+ val prob = this.executor.outputs(0).toArray
+ prob
+ }
+ }
+}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TestCharRnn.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TestCharRnn.scala
new file mode 100644
index 000000000000..a3351ff12557
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TestCharRnn.scala
@@ -0,0 +1,88 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import ml.dmlc.mxnet._
+import org.kohsuke.args4j.{CmdLineParser, Option}
+import org.slf4j.LoggerFactory
+import scala.collection.JavaConverters._
+
+/**
+ * Follows the demo, to test the char rnn:
+ * https://github.com/dmlc/mxnet/blob/master/example/rnn/char-rnn.ipynb
+ * @author Depeng Liang
+ */
+object TestCharRnn {
+
+ private val logger = LoggerFactory.getLogger(classOf[TrainCharRnn])
+
+ def main(args: Array[String]): Unit = {
+ val stcr = new TestCharRnn
+ val parser: CmdLineParser = new CmdLineParser(stcr)
+ try {
+ parser.parseArgument(args.toList.asJava)
+ assert(stcr.dataPath != null && stcr.modelPrefix != null && stcr.starterSentence != null)
+
+ // The batch size for training
+ val batchSize = 32
+ // We can support various length input
+ // For this problem, we cut each input sentence to length of 129
+ // So we only need fix length bucket
+ val buckets = List(129)
+ // hidden unit in LSTM cell
+ val numHidden = 512
+ // embedding dimension, which is, map a char to a 256 dim vector
+ val numEmbed = 256
+ // number of lstm layer
+ val numLstmLayer = 3
+
+ // build char vocabluary from input
+ val vocab = Utils.buildVocab(stcr.dataPath)
+
+ // load from check-point
+ val (_, argParams, _) = Model.loadCheckpoint(stcr.modelPrefix, 75)
+
+ // build an inference model
+ val model = new RnnModel.LSTMInferenceModel(numLstmLayer, vocab.size + 1,
+ numHidden = numHidden, numEmbed = numEmbed,
+ numLabel = vocab.size + 1, argParams = argParams, dropout = 0.2f)
+
+ // generate a sequence of 1200 chars
+ val seqLength = 1200
+ val inputNdarray = NDArray.zeros(1)
+ val revertVocab = Utils.makeRevertVocab(vocab)
+
+ // Feel free to change the starter sentence
+ var output = stcr.starterSentence
+ val randomSample = true
+ var newSentence = true
+ val ignoreLength = output.length()
+
+ for (i <- 0 until seqLength) {
+ if (i <= ignoreLength - 1) Utils.makeInput(output(i), vocab, inputNdarray)
+ else Utils.makeInput(output.takeRight(1)(0), vocab, inputNdarray)
+ val prob = model.forward(inputNdarray, newSentence)
+ newSentence = false
+ val nextChar = Utils.makeOutput(prob, revertVocab, randomSample)
+ if (nextChar == "") newSentence = true
+ if (i >= ignoreLength) output = output ++ nextChar
+ }
+
+ // Let's see what we can learned from char in Obama's speech.
+ logger.info(output)
+ } catch {
+ case ex: Exception => {
+ logger.error(ex.getMessage, ex)
+ parser.printUsage(System.err)
+ sys.exit(1)
+ }
+ }
+ }
+}
+
+class TestCharRnn {
+ @Option(name = "--data-path", usage = "the input train data file")
+ private val dataPath: String = null
+ @Option(name = "--model-prefix", usage = "the model prefix")
+ private val modelPrefix: String = null
+ @Option(name = "--starter-sentence", usage = "the starter sentence")
+ private val starterSentence: String = null
+}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TrainCharRnn.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TrainCharRnn.scala
new file mode 100644
index 000000000000..ef15636f836b
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/TrainCharRnn.scala
@@ -0,0 +1,160 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import ml.dmlc.mxnet._
+import org.kohsuke.args4j.{CmdLineParser, Option}
+import org.slf4j.LoggerFactory
+import scala.collection.JavaConverters._
+import ml.dmlc.mxnet.optimizer.Adam
+
+/**
+ * Follows the demo, to train the char rnn:
+ * https://github.com/dmlc/mxnet/blob/master/example/rnn/char-rnn.ipynb
+ * @author Depeng Liang
+ */
+object TrainCharRnn {
+
+ private val logger = LoggerFactory.getLogger(classOf[TrainCharRnn])
+
+ def main(args: Array[String]): Unit = {
+ val incr = new TrainCharRnn
+ val parser: CmdLineParser = new CmdLineParser(incr)
+ try {
+ parser.parseArgument(args.toList.asJava)
+ assert(incr.dataPath != null && incr.saveModelPath != null)
+
+ // The batch size for training
+ val batchSize = 32
+ // We can support various length input
+ // For this problem, we cut each input sentence to length of 129
+ // So we only need fix length bucket
+ val buckets = List(129)
+ // hidden unit in LSTM cell
+ val numHidden = 512
+ // embedding dimension, which is, map a char to a 256 dim vector
+ val numEmbed = 256
+ // number of lstm layer
+ val numLstmLayer = 3
+ // we will show a quick demo in 2 epoch
+ // and we will see result by training 75 epoch
+ val numEpoch = 75
+ // learning rate
+ val learningRate = 0.001f
+ // we will use pure sgd without momentum
+ val momentum = 0.0f
+
+ val ctx = if (incr.gpu == -1) Context.cpu() else Context.gpu(incr.gpu)
+ val vocab = Utils.buildVocab(incr.dataPath)
+
+ // generate symbol for a length
+ def symGen(seqLen: Int): Symbol = {
+ Lstm.lstmUnroll(numLstmLayer, seqLen, vocab.size + 1,
+ numHidden = numHidden, numEmbed = numEmbed,
+ numLabel = vocab.size + 1, dropout = 0.2f)
+ }
+
+ // initalize states for LSTM
+ val initC = for (l <- 0 until numLstmLayer) yield (s"l${l}_init_c", (batchSize, numHidden))
+ val initH = for (l <- 0 until numLstmLayer) yield (s"l${l}_init_h", (batchSize, numHidden))
+ val initStates = initC ++ initH
+
+ val dataTrain = new ButketIo.BucketSentenceIter(incr.dataPath, vocab, buckets,
+ batchSize, initStates, seperateChar = "\n",
+ text2Id = Utils.text2Id, readContent = Utils.readContent)
+
+ // the network symbol
+ val symbol = symGen(buckets(0))
+
+ val datasAndLabels = dataTrain.provideData ++ dataTrain.provideLabel
+ val (argShapes, outputShapes, auxShapes) = symbol.inferShape(datasAndLabels)
+
+ val initializer = new Xavier(factorType = "in", magnitude = 2.34f)
+
+ val argNames = symbol.listArguments()
+ val argDict = argNames.zip(argShapes.map(NDArray.zeros(_, ctx))).toMap
+ val auxNames = symbol.listAuxiliaryStates()
+ val auxDict = auxNames.zip(auxShapes.map(NDArray.zeros(_, ctx))).toMap
+
+ val gradDict = argNames.zip(argShapes).filter { case (name, shape) =>
+ !datasAndLabels.contains(name)
+ }.map(x => x._1 -> NDArray.empty(x._2, ctx) ).toMap
+
+ argDict.foreach { case (name, ndArray) =>
+ if (!datasAndLabels.contains(name)) {
+ initializer.initWeight(name, ndArray)
+ }
+ }
+
+ val data = argDict("data")
+ val label = argDict("softmax_label")
+
+ val executor = symbol.bind(ctx, argDict, gradDict)
+
+ val opt = new Adam(learningRate = learningRate, wd = 0.0001f)
+
+ val paramsGrads = gradDict.toList.zipWithIndex.map { case ((name, grad), idx) =>
+ (idx, name, grad, opt.createState(idx, argDict(name)))
+ }
+
+ val evalMetric = new CustomMetric(Utils.perplexity, "perplexity")
+ val batchEndCallback = new Callback.Speedometer(batchSize, 50)
+ val epochEndCallback = Utils.doCheckpoint(s"${incr.saveModelPath}/obama")
+
+ for (epoch <- 0 until numEpoch) {
+ // Training phase
+ val tic = System.currentTimeMillis
+ evalMetric.reset()
+ var nBatch = 0
+ var epochDone = false
+ // Iterate over training data.
+ dataTrain.reset()
+ while (!epochDone) {
+ var doReset = true
+ while (doReset && dataTrain.hasNext) {
+ val dataBatch = dataTrain.next()
+
+ data.set(dataBatch.data(0))
+ label.set(dataBatch.label(0))
+ executor.forward(isTrain = true)
+ executor.backward()
+ paramsGrads.foreach { case (idx, name, grad, optimState) =>
+ opt.update(idx, argDict(name), grad, optimState)
+ }
+
+ // evaluate at end, so out_cpu_array can lazy copy
+ evalMetric.update(dataBatch.label, executor.outputs)
+
+ nBatch += 1
+ batchEndCallback.invoke(epoch, nBatch, evalMetric)
+ }
+ if (doReset) {
+ dataTrain.reset()
+ }
+ // this epoch is done
+ epochDone = true
+ }
+ val (name, value) = evalMetric.get
+ logger.info(s"Epoch[$epoch] Train-$name=$value")
+ val toc = System.currentTimeMillis
+ logger.info(s"Epoch[$epoch] Time cost=${toc - tic}")
+
+ epochEndCallback.invoke(epoch, symbol, argDict, auxDict)
+ }
+ executor.dispose()
+ } catch {
+ case ex: Exception => {
+ logger.error(ex.getMessage, ex)
+ parser.printUsage(System.err)
+ sys.exit(1)
+ }
+ }
+ }
+}
+
+class TrainCharRnn {
+ @Option(name = "--data-path", usage = "the input train data file")
+ private val dataPath: String = null
+ @Option(name = "--save-model-path", usage = "the model saving path")
+ private val saveModelPath: String = null
+ @Option(name = "--gpu", usage = "which gpu card to use, default is -1, means using cpu")
+ private val gpu: Int = -1
+}
diff --git a/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Utils.scala b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Utils.scala
new file mode 100644
index 000000000000..9fba8c1fd3e4
--- /dev/null
+++ b/scala-package/examples/src/main/scala/ml/dmlc/mxnet/examples/rnn/Utils.scala
@@ -0,0 +1,137 @@
+package ml.dmlc.mxnet.examples.rnn
+
+import scala.io.Source
+import ml.dmlc.mxnet.EvalMetric
+import ml.dmlc.mxnet.NDArray
+import ml.dmlc.mxnet.EpochEndCallback
+import ml.dmlc.mxnet.Model
+import ml.dmlc.mxnet.Symbol
+import scala.util.Random
+
+/**
+ * @author Depeng Liang
+ */
+object Utils {
+
+ def readContent(path: String): String = Source.fromFile(path).mkString
+
+ // Build a vocabulary of what char we have in the content
+ def buildVocab(path: String): Map[String, Int] = {
+ val content = readContent(path).split("\n")
+ var idx = 1 // 0 is left for zero padding
+ var theVocab = Map[String, Int]()
+ for (line <- content) {
+ for (char <- line) {
+ val key = s"$char"
+ if (!theVocab.contains(key)) {
+ theVocab = theVocab + (key -> idx)
+ idx += 1
+ }
+ }
+ }
+ theVocab
+ }
+
+ // We will assign each char with a special numerical id
+ def text2Id(sentence: String, theVocab: Map[String, Int]): Array[Int] = {
+ val words = for (char <- sentence) yield theVocab(s"$char")
+ words.toArray
+ }
+
+ // Evaluation
+ def perplexity(label: NDArray, pred: NDArray): Float = {
+ val shape = label.shape
+ val size = shape(0) * shape(1)
+ val labelT = {
+ val tmp = label.toArray.grouped(shape(1)).toArray
+ val result = Array.fill[Float](size)(0f)
+ var idx = 0
+ for (i <- 0 until shape(1)) {
+ for (j <- 0 until shape(0)) {
+ result(idx) = tmp(j)(i)
+ idx += 1
+ }
+ }
+ result
+ }
+ var loss = 0f
+ val predArray = pred.toArray.grouped(pred.shape(1)).toArray
+ for (i <- 0 until pred.shape(0)) {
+ loss += -Math.log(Math.max(1e-10, predArray(i)(labelT(i).toInt)).toFloat).toFloat
+ }
+ loss / size
+ }
+
+ def doCheckpoint(prefix: String): EpochEndCallback = new EpochEndCallback {
+ override def invoke(epoch: Int, symbol: Symbol,
+ argParams: Map[String, NDArray],
+ auxStates: Map[String, NDArray]): Unit = {
+ Model.saveCheckpoint(prefix, epoch + 1, symbol, argParams, auxStates)
+ }
+ }
+
+ // helper strcuture for prediction
+ def makeRevertVocab(vocab: Map[String, Int]): Map[Int, String] = {
+ var dic = Map[Int, String]()
+ vocab.foreach { case (k, v) =>
+ dic = dic + (v -> k)
+ }
+ dic
+ }
+
+ // make input from char
+ def makeInput(char: Char, vocab: Map[String, Int], arr: NDArray): Unit = {
+ val idx = vocab(s"$char")
+ val tmp = NDArray.zeros(1)
+ tmp.set(idx)
+ arr.set(tmp)
+ }
+
+ // helper function for random sample
+ def cdf(weights: Array[Float]): Array[Float] = {
+ val total = weights.sum
+ var result = Array[Float]()
+ var cumsum = 0f
+ for (w <- weights) {
+ cumsum += w
+ result = result :+ (cumsum / total)
+ }
+ result
+ }
+
+ def choice(population: Array[String], weights: Array[Float]): String = {
+ assert(population.length == weights.length)
+ val cdfVals = cdf(weights)
+ val x = Random.nextFloat()
+ var idx = 0
+ var found = false
+ for (i <- 0 until cdfVals.length) {
+ if (cdfVals(i) >= x && !found) {
+ idx = i
+ found = true
+ }
+ }
+ population(idx)
+ }
+
+ // we can use random output or fixed output by choosing largest probability
+ def makeOutput(prob: Array[Float], vocab: Map[Int, String],
+ sample: Boolean = false, temperature: Float = 1f): String = {
+ var idx = -1
+ val char = if (sample == false) {
+ idx = ((-1f, -1) /: prob.zipWithIndex) { (max, elem) =>
+ if (max._1 < elem._1) elem else max
+ }._2
+ if (vocab.contains(idx)) vocab(idx)
+ else ""
+ } else {
+ val fixDict = Array("") ++ (1 until vocab.size + 1).map(i => vocab(i))
+ var scaleProb = prob.map(x => if (x < 1e-6) 1e-6 else if (x > 1 - 1e-6) 1 - 1e-6 else x)
+ var rescale = scaleProb.map(x => Math.exp(Math.log(x) / temperature).toFloat)
+ val sum = rescale.sum.toFloat
+ rescale = rescale.map(_ / sum)
+ choice(fixDict, rescale)
+ }
+ char
+ }
+}
diff --git a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
index 5e00481ae416..394176487172 100644
--- a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
+++ b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc
@@ -139,6 +139,54 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_mxnet_LibInfo_mxFuncInvoke
return ret;
}
+JNIEXPORT jint JNICALL Java_ml_dmlc_mxnet_LibInfo_mxFuncInvokeEx
+ (JNIEnv *env, jobject obj, jlong funcPtr, jlongArray useVars,
+ jfloatArray scalarArgs, jlongArray mutateVars,
+ jint numParams, jobjectArray paramKeys, jobjectArray paramVals) {
+ jlong *cUseVars = env->GetLongArrayElements(useVars, NULL);
+ jfloat *cScalarArgs = env->GetFloatArrayElements(scalarArgs, NULL);
+ jlong *cMutateVars = env->GetLongArrayElements(mutateVars, NULL);
+ jbyte **cParamKeys = NULL;
+ jbyte **cParamVals = NULL;
+ if (numParams > 0) {
+ cParamKeys = new jbyte *[numParams];
+ cParamVals = new jbyte *[numParams];
+ for (size_t i = 0; i < numParams; i++) {
+ jbyteArray jkey = reinterpret_cast(env->GetObjectArrayElement(paramKeys, i));
+ jbyte *cParamKey = env->GetByteArrayElements(jkey, NULL);
+ cParamKeys[i] = cParamKey;
+ env->DeleteLocalRef(jkey);
+ jbyteArray jval = reinterpret_cast(env->GetObjectArrayElement(paramVals, i));
+ jbyte *cParamVal = env->GetByteArrayElements(jval, NULL);
+ cParamVals[i] = cParamVal;
+ env->DeleteLocalRef(jval);
+ }
+ }
+ int ret = MXFuncInvokeEx(reinterpret_cast(funcPtr),
+ reinterpret_cast(cUseVars),
+ reinterpret_cast(cScalarArgs),
+ reinterpret_cast(cMutateVars),
+ static_cast(numParams),
+ reinterpret_cast(cParamKeys),
+ reinterpret_cast(cParamVals));
+ env->ReleaseLongArrayElements(useVars, cUseVars, 0);
+ env->ReleaseFloatArrayElements(scalarArgs, cScalarArgs, 0);
+ env->ReleaseLongArrayElements(mutateVars, cMutateVars, 0);
+ if (numParams > 0) {
+ for (size_t i = 0; i < numParams; i++) {
+ jbyteArray jkey = reinterpret_cast(env->GetObjectArrayElement(paramKeys, i));
+ env->ReleaseByteArrayElements(jkey, cParamKeys[i], 0);
+ env->DeleteLocalRef(jkey);
+ jbyteArray jval = reinterpret_cast(env->GetObjectArrayElement(paramVals, i));
+ env->ReleaseByteArrayElements(jval, cParamVals[i], 0);
+ env->DeleteLocalRef(jval);
+ }
+ delete[] cParamKeys;
+ delete[] cParamVals;
+ }
+ return ret;
+}
+
JNIEXPORT jint JNICALL Java_ml_dmlc_mxnet_LibInfo_mxNDArraySaveRawBytes
(JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject dataBuf) {
size_t length;
diff --git a/src/common/mxrtc.cc b/src/common/mxrtc.cc
index 4fd687267409..c1ab065db627 100644
--- a/src/common/mxrtc.cc
+++ b/src/common/mxrtc.cc
@@ -7,7 +7,7 @@
#include
#if ((MXNET_USE_CUDA) && (MXNET_USE_NVRTC))
namespace mxnet {
-const std::string MXRtc::str_type = "float";
+const char MXRtc::str_type[] = "float";
std::unordered_map MXRtc::kernel_registry;
MXRtc::MXRtc(const std::string& name,
@@ -79,36 +79,36 @@ std::string MXRtc::decorate(const std::string& name,
std::vector > const& output,
const std::string kernel) {
std::string source;
- source += "\nextern \"C\" __global__ void " + name + "(";
+ source = source + "\nextern \"C\" __global__ void " + name + "(";
for (auto &i : input) {
- source += "const " + str_type + "* " + i.first + ",";
+ source = source + "const " + str_type + "* " + i.first + ",";
}
for (auto &i : output) {
- source += str_type + "* " + i.first + ",";
+ source = source + str_type + "* " + i.first + ",";
}
source.pop_back();
source = source + ") {\n";
for (auto &i : input) {
- source += "const int " + i.first + "_ndim = " +
+ source = source + "const int " + i.first + "_ndim = " +
std::to_string(i.second.shape().ndim()) + ";\n";
- source += "const int " + i.first + "_dims[] = {";
+ source = source + "const int " + i.first + "_dims[] = {";
for (index_t j = 0; j < i.second.shape().ndim(); ++j) {
- source += std::to_string(i.second.shape()[j]) + ",";
+ source = source + std::to_string(i.second.shape()[j]) + ",";
}
source.pop_back();
- source += "};\n";
+ source = source + "};\n";
}
for (auto &i : output) {
- source += "const int " + i.first + "_ndim = " +
+ source = source + "const int " + i.first + "_ndim = " +
std::to_string(i.second.shape().ndim()) + ";\n";
- source += "const int " + i.first + "_dims[] = {";
+ source = source + "const int " + i.first + "_dims[] = {";
for (index_t j = 0; j < i.second.shape().ndim(); ++j) {
- source += std::to_string(i.second.shape()[j]) + ",";
+ source = source + std::to_string(i.second.shape()[j]) + ",";
}
source.pop_back();
- source += "};\n";
+ source = source + "};\n";
}
- source += kernel + "\n}\n";
+ source = source + kernel + "\n}\n";
return source;
}
diff --git a/src/kvstore/kvstore.cc b/src/kvstore/kvstore.cc
index 0de025ba9a35..95000fccae29 100644
--- a/src/kvstore/kvstore.cc
+++ b/src/kvstore/kvstore.cc
@@ -23,14 +23,16 @@ KVStore* KVStore::Create(const char *type_name) {
tname == "local_allreduce_cpu") {
kv = new kvstore::KVStoreLocal();
} else if (tname == "device" ||
+ tname == "local_update_device" ||
tname == "local_allreduce_device") {
- tname = "local_allreduce_device";
- kv = new kvstore::KVStoreDevice();
+ kv = new kvstore::KVStoreDevice(true);
} else if (tname == "dist_async" ||
tname == "dist_sync" ||
+ tname == "dist_sync_device" ||
tname == "dist") {
#if MXNET_USE_DIST_KVSTORE
- kv = new kvstore::KVStoreDist();
+ kv = new kvstore::KVStoreDist(
+ tname.find("device") != std::string::npos);
if (tname == "dist_sync" &&
kv->IsWorkerNode() &&
kv->get_rank() == 0) {
diff --git a/src/kvstore/kvstore_device.h b/src/kvstore/kvstore_device.h
index 2667df523272..82c04f9ec337 100644
--- a/src/kvstore/kvstore_device.h
+++ b/src/kvstore/kvstore_device.h
@@ -21,6 +21,10 @@ namespace kvstore {
* \brief Device implementation of KVStore that do reduction on GPU reduction.
*/
class KVStoreDevice : public KVStoreLocal {
+ public:
+ explicit KVStoreDevice(bool device_mode)
+ : device_mode_(device_mode) {}
+
protected:
using KeyShape = std::pair;
void Init(const std::vector& keys,
@@ -57,27 +61,27 @@ class KVStoreDevice : public KVStoreLocal {
}
}
- tm_buf.merged = NDArray(s, tm_buf.ctx);
+ tm_buf.merged = NDArray(s, Context::CPUPinned(tm_buf.ctx.dev_id));
+ tm_buf.merged_device = NDArray(s, tm_buf.ctx);
ctx_info[tm_buf.ctx.dev_id].second += s.Size();
}
}
const NDArray& MergePushValue(
int key, const std::vector& val, int priority) override {
- if (updater_ != nullptr) {
- // fall back to CPU based update if updater presents
+ if (!device_mode_) {
return KVStoreLocal::MergePushValue(key, val, priority);
}
-
- if (merge_buf_.empty()) {
+ if (!buf_initialized_) {
InitMergeBuffers(val);
+ buf_initialized_ = true;
}
auto& buf = merge_buf_[key];
std::vector reduce(val.size());
- CHECK(!buf.merged.is_none());
- CopyFromTo(val[0], &(buf.merged), priority);
- reduce[0] = buf.merged;
+ CHECK(!buf.merged_device.is_none());
+ CopyFromTo(val[0], &(buf.merged_device), priority);
+ reduce[0] = buf.merged_device;
for (size_t i = 1; i < val.size(); ++i) {
NDArray *copy_buf = buf.AllocCopyBuf(
@@ -85,11 +89,45 @@ class KVStoreDevice : public KVStoreLocal {
CopyFromTo(val[i], copy_buf, priority);
reduce[i] = *copy_buf;
}
- ElementwiseSum(reduce, &buf.merged);
- return buf.merged;
+ ElementwiseSum(reduce, &buf.merged_device);
+
+ if (updater_ != nullptr) {
+ CopyFromTo(buf.merged_device, &(buf.merged));
+ return buf.merged;
+ } else {
+ return buf.merged_device;
+ }
+ }
+
+ void ScatterPullValue(
+ int key,
+ const NDArray& src,
+ const std::vector& vals,
+ int priority) override {
+ if (!device_mode_) {
+ KVStoreLocal::ScatterPullValue(key, src, vals, priority);
+ return;
+ }
+ auto it = merge_buf_.find(key);
+ if (it != merge_buf_.end() && it->first == key) {
+ auto& buf = it->second;
+ if (!buf.merged_device.is_none()) {
+ CopyFromTo(src, &(buf.merged_device));
+ for (auto* vptr : vals) {
+ CopyFromTo(buf.merged_device, vptr, priority);
+ }
+ return;
+ }
+ }
+ // default, copy back
+ for (auto* vptr : vals) {
+ CopyFromTo(src, vptr, priority);
+ }
}
private:
+ bool device_mode_;
+ bool buf_initialized_{false};
std::vector sorted_key_shape_;
};
} // namespace kvstore
diff --git a/src/kvstore/kvstore_dist.h b/src/kvstore/kvstore_dist.h
index 270d85101d9f..2705effe0104 100644
--- a/src/kvstore/kvstore_dist.h
+++ b/src/kvstore/kvstore_dist.h
@@ -7,7 +7,7 @@
#define MXNET_KVSTORE_KVSTORE_DIST_H_
#include
#include
-#include "./kvstore_local.h"
+#include "./kvstore_device.h"
#include "mxnet/engine.h"
#include "ps/ps.h"
#include "./kvstore_dist_server.h"
@@ -25,9 +25,11 @@ namespace kvstore {
* it's the server node's job to control the data consistency among all
* workers. see details on \ref ServerHandle::Start
*/
-class KVStoreDist : public KVStoreLocal {
+class KVStoreDist : public KVStoreDevice {
public:
- KVStoreDist() : ps_worker_(nullptr), server_(nullptr) {
+ explicit KVStoreDist(bool device_mode)
+ : KVStoreDevice(device_mode),
+ ps_worker_(nullptr), server_(nullptr) {
if (IsWorkerNode()) {
ps_worker_ = new ps::KVWorker(0);
ps::Start("mxnet\0");
@@ -37,6 +39,7 @@ class KVStoreDist : public KVStoreLocal {
virtual ~KVStoreDist() {
Engine::Get()->WaitForAll();
if (IsWorkerNode()) {
+ ps::Postoffice::Get()->Barrier(ps::kWorkerGroup);
if (get_rank() == 0) {
// stop the executor at servers
SendCommandToServers(kStopServer, "");
@@ -112,11 +115,11 @@ class KVStoreDist : public KVStoreLocal {
if (buf.is_none()) {
buf = NDArray(vals[0]->shape(), pinned_ctx_);
}
- real_t* data = static_cast(buf.data().dptr_);
- size_t size = buf.shape().Size();
- auto pull_from_servers = [this, key, data, size](
+ auto pull_from_servers = [this, key, buf] (
RunContext rctx, Engine::CallbackOnComplete cb) {
+ real_t* data = static_cast(buf.data().dptr_);
+ size_t size = buf.shape().Size();
// convert to ps keys
PSKV& pskv = EncodeKey(key, size);
@@ -133,10 +136,7 @@ class KVStoreDist : public KVStoreLocal {
{buf.var()},
FnProperty::kNormal, priority);
- // copy data from buffer to vals
- for (auto v : vals) {
- CopyFromTo(buf, v);
- }
+ ScatterPullValue(key, buf, vals, priority);
}
}
@@ -267,6 +267,8 @@ class KVStoreDist : public KVStoreLocal {
return pskv;
}
+ // whether use device distributed local sync.
+ bool device_mode_;
/**
* \brief for worker to push and pull data
*/
diff --git a/src/kvstore/kvstore_local.h b/src/kvstore/kvstore_local.h
index e897f6437256..3e6ab7b5b3b0 100644
--- a/src/kvstore/kvstore_local.h
+++ b/src/kvstore/kvstore_local.h
@@ -68,15 +68,11 @@ class KVStoreLocal : public KVStore {
if (updater_ != nullptr || it == merge_buf_.end()) {
auto it = local_.find(key);
CHECK(it != local_.end()) << "key " << key << " has not been inited";
- const NDArray& src = it->second;
- for (auto* vptr : grouped_vals[i]) {
- CopyFromTo(src, vptr, priority);
- }
+ ScatterPullValue(
+ key, it->second, grouped_vals[i], priority);
} else {
- auto& src = it->second.merged;
- for (auto* vptr : grouped_vals[i]) {
- CopyFromTo(src, vptr, priority);
- }
+ ScatterPullValue(
+ key, it->second.merged, grouped_vals[i], priority);
}
}
}
@@ -88,6 +84,8 @@ class KVStoreLocal : public KVStore {
Context ctx;
// the merged value
NDArray merged;
+ // the merged value on device
+ NDArray merged_device;
/// \brief the cpu buffer for gpu data
std::vector copy_buf;
// allocate copy buffer, if it has not been allocated
@@ -169,6 +167,16 @@ class KVStoreLocal : public KVStore {
return buf.merged;
}
+ virtual void ScatterPullValue(
+ int key,
+ const NDArray& src,
+ const std::vector& vals,
+ int priority) {
+ for (auto* vptr : vals) {
+ CopyFromTo(src, vptr, priority);
+ }
+ }
+
/// \brief buffer for merging push value
std::unordered_map merge_buf_;
// pinned context
diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h
index fd7c1aa283d4..e6da76d90329 100644
--- a/src/operator/activation-inl.h
+++ b/src/operator/activation-inl.h
@@ -60,10 +60,6 @@ class ActivationOp : public Operator {
Tensor data = in_data[activation::kData].FlatTo2D(s);
Tensor out = out_data[activation::kOut].FlatTo2D(s);
Assign(out, req[activation::kOut], F(data));
- // Use asynchronize complete notification
- // This is only intended as an example of async ops
- if (s != NULL) s->Wait();
- ctx.async_on_complete();
}
virtual void Backward(const OpContext &ctx,
@@ -83,16 +79,6 @@ class ActivationOp : public Operator {
Tensor m_out_data = out_data[activation::kOut].FlatTo2D(s);
Tensor m_in_grad = in_grad[activation::kData].FlatTo2D(s);
Assign(m_in_grad, req[activation::kData], F(m_out_data) * m_out_grad);
- // Use asynchronize complete notification
- // This is only intended as an example of async ops
- if (s != NULL) s->Wait();
- ctx.async_on_complete();
- }
-
- virtual ExecType exec_type() const {
- // Use asynchronize complete notification
- // This is only intended as an example of async ops
- return kAsync;
}
}; // class ActivationOp
diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h
index e8c5502d86af..03238b067ea3 100644
--- a/src/operator/batch_norm-inl.h
+++ b/src/operator/batch_norm-inl.h
@@ -88,6 +88,9 @@ class BatchNormOp : public Operator {
Tensor bias = in_data[batchnorm::kBeta].get(s);
Tensor moving_mean = aux_states[batchnorm::kMovingMean].get(s);
Tensor moving_var = aux_states[batchnorm::kMovingVar].get(s);
+
+ if (ctx.is_train && param_.fix_gamma) slope = 1.f;
+
// whether use global statistics
if (ctx.is_train && !param_.use_global_stats) {
Tensor mean = out_data[batchnorm::kMean].get(s);
@@ -98,16 +101,10 @@ class BatchNormOp : public Operator {
mean = scale * sumall_except_dim<1>(data);
var = scale * sumall_except_dim<1>(F(
data - broadcast<1>(mean, data.shape_)));
- if (param_.fix_gamma) {
- Assign(out, req[batchnorm::kOut], (data - broadcast<1>(mean, data.shape_)) /
- F(broadcast<1>(var + param_.eps, data.shape_)) +
- broadcast<1>(bias, out.shape_));
- } else {
- Assign(out, req[batchnorm::kOut], broadcast<1>(slope, out.shape_) *
- (data - broadcast<1>(mean, data.shape_)) /
- F(broadcast<1>(var + param_.eps, data.shape_)) +
- broadcast<1>(bias, out.shape_));
- }
+ Assign(out, req[batchnorm::kOut], broadcast<1>(slope, out.shape_) *
+ (data - broadcast<1>(mean, data.shape_)) /
+ F(broadcast<1>(var + param_.eps, data.shape_)) +
+ broadcast<1>(bias, out.shape_));
} else {
Assign(out, req[batchnorm::kOut], broadcast<1>(slope /
F(moving_var + param_.eps),
@@ -183,19 +180,15 @@ class BatchNormOp : public Operator {
sumall_except_dim<1>(
grad * (data - broadcast<1>(mean, data.shape_)) /
F(broadcast<1>(var + param_.eps, data.shape_))));
- Assign(grad_in, req[batchnorm::kData],
- (grad * broadcast<1>(slope, data.shape_)) *
- broadcast<1>(1.0f / F(var + param_.eps), data.shape_) +
- broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean,
- data.shape_)) +
- broadcast<1>(gmean, data.shape_) * scale);
} else {
- Assign(grad_in, req[batchnorm::kData], grad *
- broadcast<1>(1.0f / F(var + param_.eps), data.shape_) +
- broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean,
- data.shape_)) +
- broadcast<1>(gmean, data.shape_) * scale);
+ Assign(gslope, req[batchnorm::kGamma], 0.0f);
}
+ Assign(grad_in, req[batchnorm::kData],
+ (grad * broadcast<1>(slope, data.shape_)) *
+ broadcast<1>(1.0f / F(var + param_.eps), data.shape_) +
+ broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean,
+ data.shape_)) +
+ broadcast<1>(gmean, data.shape_) * scale);
Assign(gbias, req[batchnorm::kBeta], sumall_except_dim<1>(grad));
} else {
// use global statistics with freeze moving mean and var.
@@ -204,14 +197,12 @@ class BatchNormOp : public Operator {
sumall_except_dim<1>(
grad * (data - broadcast<1>(moving_mean, data.shape_)) /
F(broadcast<1>(moving_var + param_.eps, data.shape_))));
- Assign(grad_in, req[batchnorm::kData], (grad * broadcast<1>(slope, data.shape_)) *
- broadcast<1>(
- 1.0f / F(moving_var + param_.eps), data.shape_));
} else {
- Assign(grad_in, req[batchnorm::kData], grad *
- broadcast<1>(
- 1.0f / F(moving_var + param_.eps), data.shape_));
+ Assign(gslope, req[batchnorm::kGamma], 0.0f);
}
+ Assign(grad_in, req[batchnorm::kData], (grad * broadcast<1>(slope, data.shape_)) *
+ broadcast<1>(
+ 1.0f / F(moving_var + param_.eps), data.shape_));
}
}
diff --git a/src/operator/block_grad-inl.h b/src/operator/block_grad-inl.h
index ff5262d4e04a..eaf39ce59ac4 100644
--- a/src/operator/block_grad-inl.h
+++ b/src/operator/block_grad-inl.h
@@ -24,7 +24,7 @@ enum BlockGradientOpInputs {kData};
enum BlockGradientOpOutputs {kOut};
} // namespace blockgrad
-template
+template
class BlockGradientOp : public Operator {
public:
virtual void Forward(const OpContext &ctx,
@@ -37,8 +37,8 @@ class BlockGradientOp : public Operator {
CHECK_EQ(in_data.size(), 1);
CHECK_EQ(out_data.size(), 1);
Stream *s = ctx.get_stream();
- Tensor data = in_data[blockgrad::kData].FlatTo2D(s);
- Tensor out = out_data[blockgrad::kOut].FlatTo2D(s);
+ Tensor data = in_data[blockgrad::kData].FlatTo2D(s);
+ Tensor out = out_data[blockgrad::kOut].FlatTo2D(s);
out = F(data);
}
@@ -52,13 +52,13 @@ class BlockGradientOp : public Operator {
using namespace mshadow;
using namespace mshadow::expr;
Stream *s = ctx.get_stream();
- Tensor grad = in_grad[blockgrad::kData].FlatTo2D(s);
+ Tensor grad = in_grad[blockgrad::kData].FlatTo2D(s);
grad = 0.f;
}
}; // class BlockGradientOp
template
-Operator *CreateOp();
+Operator *CreateOp(int dtype);
#if DMLC_USE_CXX11
class BlockGradientProp : public OperatorProperty {
@@ -81,6 +81,17 @@ class BlockGradientProp : public OperatorProperty {
return true;
}
+ bool InferType(std::vector *in_type,
+ std::vector *out_type,
+ std::vector *aux_type) const override {
+ CHECK_EQ(in_type->size(), 1);
+ int dtype = (*in_type)[0];
+ CHECK_NE(dtype, -1) << "Input must have specified type";
+ out_type->clear();
+ out_type->push_back(dtype);
+ return true;
+ }
+
OperatorProperty* Copy() const override {
return new BlockGradientProp();
}
@@ -102,7 +113,13 @@ class BlockGradientProp : public OperatorProperty {
return {{in_data[blockgrad::kData], out_data[blockgrad::kOut]}};
}
- Operator* CreateOperator(Context ctx) const override;
+ Operator* CreateOperator(Context ctx) const override {
+ LOG(FATAL) << "Not Implemented";
+ return NULL;
+ }
+
+ Operator* CreateOperatorEx(Context ctx, std::vector *in_shape,
+ std::vector *in_type) const override;
}; // class BlockGradientProperty
#endif // DMLC_USE_CXX11
diff --git a/src/operator/block_grad.cc b/src/operator/block_grad.cc
index 67256f79f268..764618f51622 100644
--- a/src/operator/block_grad.cc
+++ b/src/operator/block_grad.cc
@@ -9,12 +9,21 @@
namespace mxnet {
namespace op {
template<>
-Operator *CreateOp() {
- return new BlockGradientOp();
+Operator *CreateOp(int dtype) {
+ Operator *op = NULL;
+ MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+ op = new BlockGradientOp();
+ });
+ return op;
}
-Operator *BlockGradientProp::CreateOperator(Context ctx) const {
- DO_BIND_DISPATCH(CreateOp);
+Operator *BlockGradientProp::CreateOperatorEx(Context ctx, std::vector *in_shape,
+ std::vector *in_type) const {
+ std::vector out_shape, aux_shape;
+ std::vector out_type, aux_type;
+ CHECK(InferType(in_type, &out_type, &aux_type));
+ CHECK(InferShape(in_shape, &out_shape, &aux_shape));
+ DO_BIND_DISPATCH(CreateOp, in_type->at(0));
}
MXNET_REGISTER_OP_PROPERTY(BlockGrad, BlockGradientProp)
diff --git a/src/operator/block_grad.cu b/src/operator/block_grad.cu
index 22707e940b7e..af5fc1660797 100644
--- a/src/operator/block_grad.cu
+++ b/src/operator/block_grad.cu
@@ -9,8 +9,12 @@
namespace mxnet {
namespace op {
template<>
-Operator *CreateOp() {
- return new BlockGradientOp();
+Operator *CreateOp(int dtype) {
+ Operator *op = NULL;
+ MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
+ op = new BlockGradientOp();
+ });
+ return op;
}
} // namespace op
diff --git a/src/operator/broadcast_reduce_op-inl.h b/src/operator/broadcast_reduce_op-inl.h
index f43bafbc16da..2457948c2d7f 100644
--- a/src/operator/broadcast_reduce_op-inl.h
+++ b/src/operator/broadcast_reduce_op-inl.h
@@ -9,6 +9,7 @@
#include
#include
#include "./mshadow_op.h"
+#include "./broadcast_reduce_op_common.h"
#if defined(__CUDACC__)
#define XPU gpu
@@ -21,10 +22,11 @@ namespace op {
struct ReduceAxisParam : public dmlc::Parameter {
bool keepdims;
- int axis;
+ TShape axis;
DMLC_DECLARE_PARAMETER(ReduceAxisParam) {
- DMLC_DECLARE_FIELD(axis).set_default(-1).set_lower_bound(-1)
- .describe("The axis to perform the reduction. axis=-1 means to reduce all dimensions");
+ DMLC_DECLARE_FIELD(axis).set_default(TShape())
+ .describe("Same as Numpy. The axes to perform the reduction."
+ "If left empty, a global reduction will be performed.");
DMLC_DECLARE_FIELD(keepdims).set_default(false)
.describe("Same as Numpy. If keepdims is set to true, "
"the axis which is reduced is left in the result as dimension with size one.");
@@ -32,13 +34,24 @@ struct ReduceAxisParam : public dmlc::Parameter {
};
struct BroadcastAxisParam : public dmlc::Parameter {
- int axis;
- int size;
+ TShape axis;
+ TShape size;
DMLC_DECLARE_PARAMETER(BroadcastAxisParam) {
- DMLC_DECLARE_FIELD(axis).set_default(0).set_lower_bound(0)
- .describe("The target axis of broadcasting.");
- DMLC_DECLARE_FIELD(size).set_default(0).set_lower_bound(1)
- .describe("Size of the broadcasting axis.");
+ DMLC_DECLARE_FIELD(axis).set_default(TShape())
+ .describe("The axes to perform the broadcasting.");
+ DMLC_DECLARE_FIELD(size).set_default(TShape())
+ .describe("Target sizes of the broadcasting axes.");
+ }
+};
+
+struct BroadcastToParam : public dmlc::Parameter {
+ TShape shape;
+ DMLC_DECLARE_PARAMETER(BroadcastToParam) {
+ DMLC_DECLARE_FIELD(shape).set_default(TShape())
+ .describe("The shape of the desired array."
+ " We can set the dim to zero if it's same as the original."
+ " E.g `A = broadcast_to(B, shape=(10, 0, 0))` "
+ "has the same meaning as `A = broadcast_axis(B, axis=0, size=10)`.");
}
};
@@ -46,26 +59,24 @@ inline TShape ReduceAxisShape(const TShape& ishape,
const EnvArguments& env) {
ReduceAxisParam param;
param.Init(env.kwargs);
- CHECK(param.axis < static_cast(ishape.ndim()) || -1 == param.axis) <<
- "axis must be smaller than the source ndim or equal to -1! Received axis=" <<
- param.axis << ", src_ndim=" << ishape.ndim();
- if (param.axis == -1 || (1 == ishape.ndim())) {
- if (param.keepdims) {
- return TShape(ishape.ndim());
- } else {
- return TShape(1);
+ std::vector axes = ParseAxes_(param.axis, ishape.ndim());
+ if (axes.size() == 0) {
+ for (index_t i = 0; i < ishape.ndim(); ++i) {
+ axes.push_back(i);
}
}
std::vector shape;
for (index_t i = 0; i < ishape.ndim(); ++i) {
- if (static_cast(i) == param.axis) {
- if (param.keepdims) {
- shape.push_back(1);
- }
- } else {
+ if (!std::binary_search(axes.begin(), axes.end(), i)) {
shape.push_back(ishape[i]);
+ } else if (param.keepdims) {
+ shape.push_back(1);
}
}
+ // We need to treat the global reduction case specially to avoid an empty output TShape.
+ if (shape.size() == 0) {
+ shape.push_back(1);
+ }
return TShape(shape.begin(), shape.end());
}
@@ -73,20 +84,32 @@ inline TShape BroadcastAxisShape(const TShape& ishape,
const EnvArguments& env) {
BroadcastAxisParam param;
param.Init(env.kwargs);
- CHECK(param.axis < static_cast(ishape.ndim())) <<
- "axis must be smaller than the source ndim" << param.axis << ", src_ndim=" << ishape.ndim();
- CHECK_EQ(ishape[param.axis], 1) <<
- "Size of the broadcasting axis in the source must be 1, axis=" << param.axis
- << ", size=" << ishape[param.axis];
- std::vector shape;
- for (index_t i = 0; i < ishape.ndim(); ++i) {
- if (static_cast(i) != param.axis) {
- shape.push_back(ishape[i]);
- } else {
- shape.push_back(param.size);
+ CHECK_EQ(param.axis.ndim(), param.size.ndim());
+ TShape ret = ishape;
+ for (index_t i = 0; i < param.axis.ndim(); i++) {
+ CHECK_EQ(ishape[param.axis[i]], 1) <<
+ "Size of the broadcasting axis in the source must be 1, axis=" << param.axis
+ << ", size=" << param.size;
+ ret[param.axis[i]] = param.size[i];
+ }
+ return ret;
+}
+
+inline TShape BroadcastToShape(const TShape& ishape,
+ const EnvArguments& env) {
+ BroadcastToParam param;
+ param.Init(env.kwargs);
+ CHECK_EQ(param.shape.ndim(), ishape.ndim());
+ TShape ret = ishape;
+ for (index_t i = 0; i < param.shape.ndim(); i++) {
+ if (param.shape[i] > 0 && (param.shape[i] != ishape[i])) {
+ CHECK_EQ(ishape[i], 1) <<
+ "Size of the broadcasting axis in the source must be 1, src_shape=" << ishape
+ << ", broadcast_to=" << param.shape;
+ ret[i] = param.shape[i];
}
}
- return TShape(shape.begin(), shape.end());
+ return ret;
}
// return a shape of scalar
@@ -103,47 +126,17 @@ void L2Norm(const TBlob &src,
OpReqType req,
RunContext ctx) {
mshadow::Stream *s = ctx.get_stream();
- mshadow::Tensor out = ret->get(s);
- mshadow::Tensor in =
- src.get_with_shape(mshadow::Shape1(src.shape_.Size()), s);
- mshadow::VectorDot(out, in, in);
- out = mshadow::expr::F(out);
-}
-
-template
-void Reduce(const TBlob &src,
- const EnvArguments& env,
- TBlob *ret,
- OpReqType req,
- RunContext ctx) {
- mshadow::Stream *s = ctx.get_stream();
- mshadow::Tensor out = ret->get(s);
- mshadow::Tensor in =
- src.get_with_shape(mshadow::Shape2(1, src.shape_.Size()), s);
- out = mshadow::expr::reduce_except_dim<0, Reducer>(in);
-}
-
-// backward function that takes input value of the op
-template
-void SumBackward_(const OutputGrad& scale,
- const EnvArguments& env,
- TBlob *in_grad,
- OpReqType req,
- RunContext ctx) {
- using namespace mxnet::op;
- using namespace mshadow::expr;
- mshadow::Stream *s = ctx.get_stream();
- CHECK_EQ(in_grad->type_flag_, scale.data.type_flag_)
- << "Unary function only support input/output with the same type";
- MSHADOW_TYPE_SWITCH(in_grad->type_flag_, DType, {
- mshadow::Tensor mscale = scale.data.get(s);
- mshadow::Tensor igrad = in_grad->FlatTo2D(s);
- ASSIGN_DISPATCH(igrad, req,
- broadcast_scalar(mscale, igrad.shape_));
+ CHECK_EQ(src.type_flag_, ret->type_flag_);
+ MSHADOW_REAL_TYPE_SWITCH(src.type_flag_, DType, {
+ mshadow::Tensor out = ret->get(s);
+ mshadow::Tensor in =
+ src.get_with_shape(mshadow::Shape1(src.shape_.Size()), s);
+ mshadow::VectorDot(out, in, in);
+ ASSIGN_DISPATCH(out, req, mshadow::expr::F(out));
});
}
-template
+template
void ReduceChannel(const TBlob &src,
const EnvArguments& env,
TBlob *ret,
@@ -153,13 +146,17 @@ void ReduceChannel(const TBlob &src,
using namespace mshadow;
using namespace mshadow::expr;
Stream *s = ctx.get_stream();
- Tensor out = ret->get_with_shape(
- Shape2(src.shape_[0], src.Size()/src.shape_[0]/src.shape_[1]),
- s);
- Tensor in = src.get_with_shape(
- Shape3(src.shape_[0], src.shape_[1], src.Size()/src.shape_[0]/src.shape_[1]),
+ CHECK_EQ(src.type_flag_, ret->type_flag_);
+ MSHADOW_REAL_TYPE_SWITCH(src.type_flag_, DType, {
+ Tensor out = ret->get_with_shape(
+ Shape2(src.shape_[0], src.Size() / src.shape_[0] / src.shape_[1]),
s);
- out = reduce_with_axis(in, 1);
+ Tensor in = src.get_with_shape(
+ Shape3(src.shape_[0], src.shape_[1], src.Size() / src.shape_[0] / src.shape_[1]),
+ s);
+ CHECK(req != kAddTo) << "AddTo is not supported";
+ ASSIGN_DISPATCH(out, req, (reduce_with_axis(in, 1)));
+ });
}
// return a shape of ReduceChannel output
@@ -174,39 +171,49 @@ inline TShape ReduceChannelShape(const TShape& ishape,
}
// Reduce the given axis
-template
+template
void ReduceAxisImpl_(const TBlob &src,
- const EnvArguments& env,
- TBlob *ret,
- OpReqType req,
- RunContext ctx,
- int axis,
- bool keepdims) {
+ const EnvArguments& env,
+ TBlob *ret,
+ OpReqType req,
+ RunContext ctx,
+ TShape axes) {
+ using namespace mshadow;
using namespace mshadow::expr;
- mshadow::Stream *s = ctx.get_stream();
- if (-1 == axis) {
- // Reduce all dimensions if axis == -1
- mshadow::Tensor in =
- src.get_with_shape(mshadow::Shape2(1, src.shape_.Size()), s);
- mshadow::Tensor out =
- ret->get_with_shape(mshadow::Shape1(ret->shape_.Size()), s);
- out = mshadow::expr::reduce_except_dim<0, Reducer>(in);
+ Stream *s = ctx.get_stream();
+ CHECK_EQ(src.type_flag_, ret->type_flag_);
+ // If the axes is empty, we just need to give an identity mapping.
+ if (axes.ndim() == 0) {
+ MSHADOW_REAL_TYPE_SWITCH(src.type_flag_, DType, {
+ Tensor in = src.FlatTo2D(s);
+ Tensor out = ret->FlatTo2D(s);
+ ASSIGN_DISPATCH(out, req, F(in));
+ });
return;
}
- int trailing = 1;
- int leading = 1;
- for (int i = 0; i < src.shape_.ndim(); ++i) {
- if (i < axis) {
- leading *= src.shape_[i];
- } else if (i > axis) {
- trailing *= src.shape_[i];
+ bool is_contiguous_axes;
+ index_t reducing_size;
+ CheckContiguousAxes_(&is_contiguous_axes, &reducing_size, axes, src.shape_);
+ if (is_contiguous_axes) {
+ MSHADOW_REAL_TYPE_SWITCH(src.type_flag_, DType, {
+ Tensor in = src.FlatTo3D(axes[0], axes[axes.ndim() - 1], s);
+ Tensor out =
+ ret->get_with_shape(mshadow::Shape1(ret->Size()), s);
+ ReduceAxesAssign(out, req, TShape(1), in);
+ });
+ } else {
+ Shape padded_shape_;
+ for (index_t i = 0; i < MXNET_SPECIAL_MAX_NDIM; ++i) {
+ padded_shape_[i] = (i < src.ndim()) ? src.shape_[i] : 1;
}
+ MSHADOW_REAL_TYPE_SWITCH(src.type_flag_, DType, {
+ Tensor in =
+ src.get_with_shape