diff --git a/.gitignore b/.gitignore index 516320555b63..bbf8acb67285 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,5 @@ deps # R *.Rcheck *.rds +*.Rproj +.Rproj.user \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 313c4ec2a52f..f3b3e0d4018f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,3 @@ -# disable sudo to use container based build sudo: false language: cpp @@ -18,6 +17,7 @@ env: - TASK=cpp_test # run tests/python - TASK=python_test + #- TASK=r_test # TODO, R test, distributed test, clang, more g++ versions @@ -27,6 +27,8 @@ matrix: env: TASK=lint - os: osx env: TASK=doc + - os: linux + env: TASK=r_test # dependent apt packages addons: @@ -67,10 +69,13 @@ cache: before_cache: - dmlc-core/scripts/travis/travis_before_cache.sh + +after_failure: + - tests/travis/travis_after_failure.sh notifications: # Emails are sent to the committer's git-configured email address by default, email: on_success: change on_failure: always - slack: dmlc:NmroCzntCiWOuxUZpii40USd + #slack: dmlc:NmroCzntCiWOuxUZpii40USd diff --git a/CMakeLists.txt b/CMakeLists.txt index 67e0b881df5b..05ce844b45f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,9 @@ mxnet_option(USE_OPENCV "Build with OpenCV support" ON) mxnet_option(USE_OPENMP "Build with Openmp support" ON) mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path mxnet_option(USE_CUDA "Build with CUDA support" ON) +mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) + + include(mshadow/cmake/mshadow.cmake) include(mshadow/cmake/Utils.cmake) @@ -65,8 +68,8 @@ if(USE_CUDNN) endif() add_subdirectory("dmlc-core") -if(NOT MSVC) - add_subdirectory("ps-lite") +if(USE_DIST_KVSTORE) + add_subdirectory("ps-lite") endif() mxnet_source_group("Source" GLOB_RECURSE "src/*.cc") @@ -91,6 +94,7 @@ if(USE_CUDA) list(APPEND SOURCE ${cuda_objs} ${cuda}) endif() + if(NOT MSVC) # Only add c++11 flags and definitions after cuda compiling add_definitions(-DDMLC_USE_CXX11) @@ -102,12 +106,18 @@ endif() add_library(mxnet SHARED ${SOURCE}) target_link_libraries(mxnet ${mshadow_LINKER_LIBS}) target_link_libraries(mxnet dmlccore) -if(NOT MSVC) - target_link_libraries(mxnet pslite) - target_link_libraries(mxnet ${pslite_LINKER_LIBS}) -endif() + + + set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet") +if(USE_DIST_KVSTORE) + add_definitions(-DMXNET_USE_DIST_KVSTORE) + target_link_libraries(mxnet pslite) + target_link_libraries(mxnet ${pslite_LINKER_LIBS}) + include_directories(SYSTEM ${pslite_INCLUDE_DIR}) +endif() + # ---[ Linter target if(MSVC) find_package(PythonInterp 2) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9f72042fb3ce..8299f53aa9ca 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -26,6 +26,12 @@ The committers are the granted write access to the project. - Mingjie is the initiator, and contributes the design of the dependency engine. * [Chuntao Hong](https://github.com/hjk41) - Chuntao is the initiator and provides the initial design of engine. +* [Chiyuan Zhang](https://github.com/pluskid) + - Chiyuan is the creator of MXNet Julia Package. +* [Qiang Kou](https://github.com/thirdwing) + - KK is a R ninja, he makes mxnet available for R users. +* [Tong He](https://github.com/hetong007) + - Tong is the major maintainer of MXNetR, he designs the mxnet interface and wrote many of the tutorials on R. ### Become a Comitter MXNet is a opensource project and we are actively looking for new comitters @@ -39,12 +45,10 @@ List of Contributors -------------------- * [Full List of Contributors](https://github.com/dmlc/mxnet/graphs/contributors) - To contributors: please add your name to the list when you submit a patch to the project:) -* [Qiang Kou](https://github.com/thirdwing) - - KK is a R ninja, he makes mxnet available for R users. -* [Tong He](https://github.com/hetong007) - - Tong is the major maintainer of MXNetR, he designs the mxnet interface and wrote many of the tutorials on R. * [Feng Wang](https://github.com/happynear) - Feng makes mxnet compatible with Windows Visual Studio. +* [Jack Deng](https://github.com/jdeng) + - Jack created the amalgamation script and Go bind for mxnet. * [Li Dong](https://github.com/donglixp) * [Piji Li](https://github.com/lipiji) * [Hu Shiwen](https://github.com/yajiedesign) @@ -55,3 +59,8 @@ List of Contributors * [Xiaodong](https://github.com/XD-DENG) * [Nan Xiao](https://github.com/road2stat) * [Junyuan Xie](https://github.com/piiswrong) +* [Wei Wu](https://github.com/tornadomeet) +* [Yuan Tang](https://github.com/terrytangyuan) + - Yuan set up the R Travis environment to make development safer. +* [Michaël Benesty](https://github.com/pommedeterresautee) + -Michaël contributes the R visualization module of mxnet diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 27a81e75861e..f42bed506a71 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -5,10 +5,21 @@ Version: 0.5 Date: 2015-10-02 Author: Tianqi Chen, Qiang Kou, Tong He Maintainer: Qiang Kou -Description: MXNet is a deep learning framework designed for both efficiency and flexibility. It allows you to mix the flavours of deep learning programs together to maximize the efficiency and your productivity. +Description: MXNet is a deep learning framework designed for both efficiency + and flexibility. It allows you to mix the flavours of deep learning programs + together to maximize the efficiency and your productivity. License: BSD URL: https://github.com/dmlc/mxnet/R-package BugReports: https://github.com/dmlc/mxnet/issues -Imports: methods, Rcpp (>= 0.11.1) -Suggests: testthat +Imports: + methods, + Rcpp (>= 0.11.1), + DiagrammeR, + data.table, + jsonlite, + magrittr, + stringr +Suggests: + testthat LinkingTo: Rcpp +RoxygenNote: 5.0.0 diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 83411a400c25..08e08746b12c 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -1,4 +1,4 @@ -# Generated by roxygen2 (4.1.1): do not edit by hand +# Generated by roxygen2: do not edit by hand S3method(Ops,MXNDArray) S3method(as.array,MXNDArray) @@ -9,6 +9,7 @@ S3method(predict,MXFeedForwardModel) S3method(print,MXNDArray) export(arguments) export(ctx) +export(graph.viz) export(is.mx.context) export(is.mx.dataiter) export(is.mx.ndarray) @@ -40,11 +41,14 @@ export(mx.model.FeedForward.create) export(mx.model.load) export(mx.model.save) export(mx.nd.array) -export(mx.nd.choose.element) +export(mx.nd.choose.element.0index) export(mx.nd.clip) export(mx.nd.copyto) export(mx.nd.dot) +export(mx.nd.exp) export(mx.nd.load) +export(mx.nd.log) +export(mx.nd.norm) export(mx.nd.ones) export(mx.nd.save) export(mx.nd.sqrt) @@ -58,6 +62,7 @@ export(mx.set.seed) export(mx.simple.bind) export(mx.symbol.Activation) export(mx.symbol.BatchNorm) +export(mx.symbol.BlockGrad) export(mx.symbol.Concat) export(mx.symbol.Convolution) export(mx.symbol.Dropout) @@ -74,10 +79,26 @@ export(mx.symbol.Reshape) export(mx.symbol.SliceChannel) export(mx.symbol.Softmax) export(mx.symbol.Variable) +export(mx.symbol.exp) export(mx.symbol.infer.shape) export(mx.symbol.load) +export(mx.symbol.log) export(mx.symbol.save) +export(mx.symbol.sqrt) +export(mx.symbol.square) export(mxnet.export) export(outputs) import(Rcpp) import(methods) +importFrom(DiagrammeR,combine_edges) +importFrom(DiagrammeR,create_edges) +importFrom(DiagrammeR,create_graph) +importFrom(DiagrammeR,create_nodes) +importFrom(DiagrammeR,render_graph) +importFrom(data.table,":=") +importFrom(data.table,as.data.table) +importFrom(data.table,data.table) +importFrom(data.table,setkey) +importFrom(jsonlite,fromJSON) +importFrom(magrittr,"%>%") +importFrom(stringr,str_extract_all) diff --git a/R-package/R/mxnet_generated.R b/R-package/R/mxnet_generated.R index 6110dd51eac8..635c45268922 100644 --- a/R-package/R/mxnet_generated.R +++ b/R-package/R/mxnet_generated.R @@ -2,7 +2,7 @@ # Generated by mxnet.export, do not edit by hand. ###### -#' Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs +#' Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based index. #' #' @param lhs NDArray #' Left operand to the function. @@ -11,7 +11,7 @@ #' @return out The result mx.ndarray #' #' @export -#' @name mx.nd.choose.element +#' @name mx.nd.choose.element.0index NULL #' Clip ndarray elements to range (a_min, a_max) @@ -40,10 +40,40 @@ NULL #' @name mx.nd.dot NULL -#' Take square root of the src +#' Take exp of the src #' #' @param src NDArray -#' Source input to the function. +#' Source input to the function +#' @return out The result mx.ndarray +#' +#' @export +#' @name mx.nd.exp +NULL + +#' Take log of the src +#' +#' @param src NDArray +#' Source input to the function +#' @return out The result mx.ndarray +#' +#' @export +#' @name mx.nd.log +NULL + +#' Take L2 norm of the src.The result will be ndarray of shape (1,) on the same device. +#' +#' @param src NDArray +#' Source input to the function +#' @return out The result mx.ndarray +#' +#' @export +#' @name mx.nd.norm +NULL + +#' Take sqrt of the src +#' +#' @param src NDArray +#' Source input to the function #' @return out The result mx.ndarray #' #' @export @@ -53,7 +83,7 @@ NULL #' Take square of the src #' #' @param src NDArray -#' Source input to the function. +#' Source input to the function #' @return out The result mx.ndarray #' #' @export @@ -208,6 +238,19 @@ mx.symbol.BatchNorm <- function(...) { mx.varg.symbol.BatchNorm(list(...)) } +#' Get output from a symbol and pass 0 gradient back +#' +#' @param data Symbol +#' Input data. +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.BlockGrad <- function(...) { + mx.varg.symbol.BlockGrad(list(...)) +} + #' Perform an feature concat on channel dim (dim 1) over all the inputs. #' #' @param num.args int, required @@ -238,7 +281,7 @@ mx.symbol.Concat <- function(...) { #' @param num.filter int (non-negative), required #' convolution filter(channel) number #' @param num.group int (non-negative), optional, default=1 -#' number of groups partition +#' Number of groups partition. This option is not supported by CuDNN, you can use SliceChannel to num_group,apply convolution and concat instead to achieve the same need. #' @param workspace long (non-negative), optional, default=512 #' Tmp workspace for convolution (MB) #' @param no.bias boolean, optional, default=False @@ -442,6 +485,8 @@ mx.symbol.SliceChannel <- function(...) { #' Input data to softmax. #' @param grad.scale float, optional, default=1 #' Scale the gradient by a float factor +#' @param multi.output boolean, optional, default=False +#' If set to true, for a (n,k,x_1,..,x_n) dimensionalinput tensor, softmax will generate n*x_1*...*x_n output, eachhas k classes #' @param name string, optional #' Name of the resulting symbol. #' @return out The result mx.symbol @@ -450,3 +495,55 @@ mx.symbol.SliceChannel <- function(...) { mx.symbol.Softmax <- function(...) { mx.varg.symbol.Softmax(list(...)) } + +#' Take exp of the src +#' +#' @param src Symbol +#' Source symbolic input to the function +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.exp <- function(...) { + mx.varg.symbol.exp(list(...)) +} + +#' Take log of the src +#' +#' @param src Symbol +#' Source symbolic input to the function +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.log <- function(...) { + mx.varg.symbol.log(list(...)) +} + +#' Take sqrt of the src +#' +#' @param src Symbol +#' Source symbolic input to the function +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.sqrt <- function(...) { + mx.varg.symbol.sqrt(list(...)) +} + +#' Take square of the src +#' +#' @param src Symbol +#' Source symbolic input to the function +#' @param name string, optional +#' Name of the resulting symbol. +#' @return out The result mx.symbol +#' +#' @export +mx.symbol.square <- function(...) { + mx.varg.symbol.square(list(...)) +} diff --git a/R-package/R/viz.graph.R b/R-package/R/viz.graph.R new file mode 100644 index 000000000000..c02eb2d1a97b --- /dev/null +++ b/R-package/R/viz.graph.R @@ -0,0 +1,158 @@ +#' +#' Convert symbol to dot object for visualization purpose. +#' +#' @importFrom magrittr %>% +#' @importFrom stringr str_extract_all +#' @importFrom magrittr %>% +#' @importFrom data.table data.table +#' @importFrom data.table as.data.table +#' @importFrom data.table := +#' @importFrom data.table setkey +#' @importFrom jsonlite fromJSON +#' @importFrom DiagrammeR create_nodes +#' @importFrom DiagrammeR create_graph +#' @importFrom DiagrammeR create_edges +#' @importFrom DiagrammeR combine_edges +#' @importFrom DiagrammeR render_graph +#' +#' @param model a \code{string} representing the path to a file containing the \code{JSon} of a model dump or the actual model dump. +#' @param graph.title a \code{string} displayed on top of the viz. +#' @param graph.title.font.name a \code{string} representing the font to use for the title. +#' @param graph.title.font.size a \code{numeric} representing the size of the font to use for the title. +#' @param graph.width.px a \code{numeric} representing the size (width) of the graph. In pixels +#' @param graph.height.px a \code{numeric} representing the size (height) of the graph. In pixels +#' +#' @return a graph object ready to be displayed with the \code{print} function. +#' +#' @export +graph.viz <- function(model, graph.title = "Computation graph", graph.title.font.name = "Helvetica", graph.title.font.size = 30, graph.width.px = 500, graph.height.px = 500){ + # generate color code for each type of node. + get.color <- function(type) { + switch( + EXPR = type, + "data" = "#8dd3c7", + "FullyConnected" = , + "Convolution" = "#fb8072", + "LeakyReLU" = , + "Activation" = "#ffffb3", + "BatchNorm" = "#bebada", + "Pooling" = "#80b1d3", + "Flatten" = , + "Reshape" = , + "Concat" = "#fdb462", + "Softmax" = "#b3de69", + "#fccde5" # default value + ) + } + + get.shape <- function(type) { + switch( + EXPR = type, + "data" = "star", + # "FullyConnected" = , + # "Convolution" = "#fb8072", + # "LeakyReLU" = , + # "Activation" = "diamond", + # "BatchNorm" = "#bebada", + "Pooling" = "oval", + "Flatten" = , + "Reshape" = , + "Concat" = "invtriangle", + # "Softmax" = "#b3de69", + "box" # default value + ) + } + + # extract IDs from string list + str2tuple <- function(str) str_extract_all(str, "\\d+") %>% unlist %>% as.numeric + + # generate text content for each node. + get.label <- function(type, mat.row) { + switch( + EXPR = type, + "FullyConnected" = mat.row[,param.num_hidden] %>% paste("FullyConnected", ., sep = "\n"), + "Convolution" = { + kernel.parameters <- mat.row[,param.kernel] %>% str2tuple + stride.parameters <- mat.row[,param.stride] %>% str2tuple + num_filter.parameters <- mat.row[,param.num_filter] %>% str2tuple + paste0("Convolution\n", kernel.parameters[1], "x", kernel.parameters[2], + "/", stride.parameters[1], ", ", num_filter.parameters) + }, + "LeakyReLU" = , + "Activation" = mat.row[,param.act_type] %>% paste0(type, "\n", .), + "Pooling" = { + pool_type.parameters <- mat.row[,param.pool_type] %>% str2tuple + kernel.parameters <- mat.row[,param.kernel] %>% str2tuple + stride.parameters <- mat.row[,param.stride] %>% str2tuple + paste0("Pooling\n", pool_type.parameters, "\n", kernel.parameters[1], "x", + kernel.parameters[2], "/", stride.parameters[1]) + }, + type # default value + ) + } + + mx.model.json <- fromJSON(model, flatten = T) + mx.model.nodes <- mx.model.json$nodes %>% as.data.table + mx.model.nodes[,id:= .I - 1] + setkey(mx.model.nodes, id) + mx.model.json$heads[1,] %>% {mx.model.nodes[id %in% .,op:=name]} # add nodes from heads (mainly data node) + mx.model.nodes[,color:= get.color(op), by = id] # by=id to have an execution row per row + mx.model.nodes[,shape:= get.shape(op), by = id] # by=id to have an execution row per row + mx.model.nodes[,label:= get.label(op, .SD), by = id] # by=id to have an execution row per row + + nodes.to.keep <- + mx.model.nodes[op != "null",id] %>% unique %>% sort + nodes.to.remove <- + mx.model.nodes[,id] %>% unique %>% setdiff(nodes.to.keep) %>% sort + + nodes <- + create_nodes( + nodes = mx.model.nodes[id %in% nodes.to.keep, id], + label = mx.model.nodes[id %in% nodes.to.keep, label], + type = "lower", + style = "filled", + fillcolor = mx.model.nodes[id %in% nodes.to.keep, color], + shape = mx.model.nodes[id %in% nodes.to.keep, shape], + data = mx.model.nodes[id %in% nodes.to.keep, id], + #fixedsize = TRUE, + width = "1.3", + height = "0.8034" + ) + + mx.model.nodes[,has.connection:= sapply(inputs, function(x) + length(x) > 0)] + + nodes.to.insert <- + mx.model.nodes[id %in% nodes.to.keep & + has.connection == T, .(id, inputs)] + + edges <- NULL + for (i in 1:nrow(nodes.to.insert)) { + current.id <- nodes.to.insert[i, id] + origin <- + nodes.to.insert[i, inputs][[1]][,1] %>% setdiff(nodes.to.remove) %>% unique + destination <- rep(current.id, length(origin)) + edges.temp <- create_edges(from = origin, + to = destination, + relationship = "leading_to") + if (is.null(edges)) + edges <- edges.temp + else + edges <- combine_edges(edges.temp, edges) + } + + graph <- + create_graph( + nodes_df = nodes, + edges_df = edges, + directed = TRUE, + # node_attrs = c("fontname = Helvetica"), + graph_attrs = paste0("label = \"", graph.title, "\"") %>% c(paste0("fontname = ", graph.title.font.name)) %>% c(paste0("fontsize = ", graph.title.font.size)) %>% c("labelloc = t"), + # node_attrs = "fontname = Helvetica", + edge_attrs = c("color = gray20", "arrowsize = 0.8", "arrowhead = vee") + ) + + return(render_graph(graph, width = graph.width.px, height = graph.height.px)) +} + +globalVariables(c("color", "shape", "label", "id", ".", "op")) diff --git a/R-package/demo/basic_convnet.R b/R-package/demo/basic_convnet.R deleted file mode 100644 index c132a6c2f3d5..000000000000 --- a/R-package/demo/basic_convnet.R +++ /dev/null @@ -1,51 +0,0 @@ -require(mxnet) - -batch.size = 100 -data = mx.symbol.Variable("data") -conv1= mx.symbol.Convolution(data = data, name="conv1", num_filter=32, kernel=c(3,3), stride=c(2,2)) - -bn1 = mx.symbol.BatchNorm(data = conv1, name="bn1") -act1 = mx.symbol.Activation(data = bn1, name="relu1", act_type="relu") - -mp1 = mx.symbol.Pooling(data = act1, name = "mp1", kernel=c(2,2), stride=c(2,2), pool_type="max") - -conv2= mx.symbol.Convolution(data = mp1, name="conv2", num_filter=32, kernel=c(3,3), stride=c(2,2)) -bn2 = mx.symbol.BatchNorm(data = conv2, name="bn2") -act2 = mx.symbol.Activation(data = bn2, name="relu2", act_type="relu") - -mp2 = mx.symbol.Pooling(data = act2, name = "mp2", kernel=c(2,2), stride=c(2,2), pool_type="max") - - -fl = mx.symbol.Flatten(data = mp2, name="flatten") -fc2 = mx.symbol.FullyConnected(data = fl, name="fc2", num_hidden=10) -softmax = mx.symbol.Softmax(data = fc2, name = "sm") - -dtrain = mx.varg.io.MNISTIter(list( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - data.shape=c(1, 28, 28), - batch.size=batch.size, - shuffle=TRUE, - flat=FALSE, - silent=0, - seed=10)) - -dtest = mx.varg.io.MNISTIter(list( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - data.shape=c(1, 28, 28), - batch.size=batch.size, - shuffle=FALSE, - flat=TRUE, - silent=0)) - -mx.set.seed(0) -devices = lapply(1:2, function(i) { - mx.cpu(i) -}) -model <- mx.model.FeedForward.create(softmax, X=dtrain, eval.data=dtest, - ctx=devices, num.round=1, - learning.rate=0.1, momentum=0.9, - initializer=mx.init.uniform(0.07), - batch.end.callback=mx.callback.log.train.metric(100)) - diff --git a/R-package/demo/basic_io.R b/R-package/demo/basic_io.R deleted file mode 100644 index 97fa94791e42..000000000000 --- a/R-package/demo/basic_io.R +++ /dev/null @@ -1,18 +0,0 @@ -require(mxnet) -# To run this, run python/mxnet/test_io.py to get data first -iter = mx.varg.io.MNISTIter(list( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - data.shape=c(784), - batch.size=3, - shuffle=TRUE, - flat=TRUE, - silent=0, - seed=10)) - -iter$reset() -print(iter$iter.next()) -data = iter$value() - -print(as.array(data$label)) -print(dim(data$data)) diff --git a/R-package/demo/basic_nn.R b/R-package/demo/basic_nn.R deleted file mode 100644 index 36e033f04009..000000000000 --- a/R-package/demo/basic_nn.R +++ /dev/null @@ -1,74 +0,0 @@ -require(mxnet) -# A basic neural net training -# To run this, run python/mxnet/test_io.py to get data first - -# Network configuration -batch.size <- 100 -data <- mx.symbol.Variable("data") -fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) -act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") -fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) -act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") -fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.Softmax(fc3, name = "sm") - -dtrain = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - data.shape=c(784), - batch.size=batch.size, - shuffle=TRUE, - flat=TRUE, - silent=0, - seed=10) - -accuracy <- function(label, pred) { - ypred = max.col(as.array(pred)) - return(sum((as.array(label) + 1) == ypred) / length(label)) -} -mx.set.seed(0) -# Training parameters -ctx <- mx.cpu() -input.shape <- c(batch.size, 784) -symbol <- softmax -init <- mx.init.uniform(0.07) -opt <- mx.opt.create("sgd", learning.rate=0.05, momentum=0.9, rescale.grad=1.0/batch.size) - -# Training procedure -texec <- mx.simple.bind(symbol, ctx=ctx, data=input.shape, grad.req=TRUE) -shapes <- lapply(texec$ref.arg.arrays, dim) -names(shapes) <- names(texec$arg.arrays) -arg.arrays <- mx.init.create(init, shapes, ctx) -mx.exec.update.arg.arrays(texec, arg.arrays, match.name=TRUE) - -updater <- mx.opt.get.updater(opt, texec$ref.arg.arrays) -nround <- 10 -tic <- proc.time() - -for (iteration in 1 : nround) { - nbatch <- 0 - train.acc <- 0 - while (dtrain$iter.next()) { - batch <- dtrain$value() - label <- batch$label - names(batch) <- c("data", "sm_label") - # copy data arguments to executor - mx.exec.update.arg.arrays(texec, batch, match.name=TRUE) - # forward pass - mx.exec.forward(texec, is.train=TRUE) - # copy prediction out - out.pred <- mx.nd.copyto(texec$outputs[[1]], mx.cpu()) - # backward pass - mx.exec.backward(texec) - arg.arrays <- updater(texec$arg.arrays, texec$ref.grad.arrays) - mx.exec.update.arg.arrays(texec, arg.arrays, skip.null=TRUE) - nbatch <- nbatch + 1 - train.acc <- train.acc + accuracy(label, out.pred) - if (nbatch %% 100 == 0) { - print(paste("Train-acc=", train.acc / nbatch)) - print(proc.time() - tic) - } - } - dtrain$reset() - print(paste("Train-acc=", train.acc / nbatch)) -} diff --git a/R-package/demo/basic_training.R b/R-package/demo/basic_training.R deleted file mode 100644 index 595469db6c76..000000000000 --- a/R-package/demo/basic_training.R +++ /dev/null @@ -1,44 +0,0 @@ -# This is an example of training using R's array - -require(mxnet) - -# Network configuration -batch.size <- 100 -data <- mx.symbol.Variable("data") -fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) -act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") -fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) -act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") -fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.Softmax(fc3, name = "sm") - -dtrain = mx.io.MNISTIter( - image="data/train-images-idx3-ubyte", - label="data/train-labels-idx1-ubyte", - data.shape=c(784), - batch.size=batch.size, - flat=TRUE, - silent=0, - seed=10) - -dtest = mx.io.MNISTIter( - image="data/t10k-images-idx3-ubyte", - label="data/t10k-labels-idx1-ubyte", - data.shape=c(784), - batch.size=batch.size, - shuffle=FALSE, - flat=TRUE, - silent=0) -# X is R's array, we load from mxnet's native iter structure, but you don't have to -X = mx.io.extract(dtrain, "data") -y = mx.io.extract(dtrain, "label") - -devices = lapply(1:2, function(i) { - mx.cpu(i) -}) -# create the model -model <- mx.model.FeedForward.create(softmax, X=X, y=y, - ctx=devices, num.round=1, - learning.rate=0.1, momentum=0.9, - initializer=mx.init.uniform(0.07), - batch.end.callback=mx.callback.log.train.metric(100)) diff --git a/R-package/man/Ops.MXNDArray.Rd b/R-package/man/Ops.MXNDArray.Rd index ec635008bedf..7a79f0a37474 100644 --- a/R-package/man/Ops.MXNDArray.Rd +++ b/R-package/man/Ops.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{Ops.MXNDArray} \alias{Ops.MXNDArray} diff --git a/R-package/man/arguments.Rd b/R-package/man/arguments.Rd index 6e8b5ad18b3b..07bf02b2cfa9 100644 --- a/R-package/man/arguments.Rd +++ b/R-package/man/arguments.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{arguments} \alias{arguments} diff --git a/R-package/man/as.array.MXNDArray.Rd b/R-package/man/as.array.MXNDArray.Rd index 1960ff01d198..34e635cdf804 100644 --- a/R-package/man/as.array.MXNDArray.Rd +++ b/R-package/man/as.array.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{as.array.MXNDArray} \alias{as.array.MXNDArray} diff --git a/R-package/man/as.matrix.MXNDArray.Rd b/R-package/man/as.matrix.MXNDArray.Rd index 2173cf01489d..68f9afdd230b 100644 --- a/R-package/man/as.matrix.MXNDArray.Rd +++ b/R-package/man/as.matrix.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{as.matrix.MXNDArray} \alias{as.matrix.MXNDArray} diff --git a/R-package/man/ctx.Rd b/R-package/man/ctx.Rd index a0b5274cb4c8..ed370a130a80 100644 --- a/R-package/man/ctx.Rd +++ b/R-package/man/ctx.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{ctx} \alias{ctx} diff --git a/R-package/man/dim.MXNDArray.Rd b/R-package/man/dim.MXNDArray.Rd index 139268e713b8..1c68efc8b8ea 100644 --- a/R-package/man/dim.MXNDArray.Rd +++ b/R-package/man/dim.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{dim.MXNDArray} \alias{dim.MXNDArray} diff --git a/R-package/man/graph.viz.Rd b/R-package/man/graph.viz.Rd new file mode 100644 index 000000000000..c9fd94a5e44f --- /dev/null +++ b/R-package/man/graph.viz.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/viz.graph.R +\name{graph.viz} +\alias{graph.viz} +\title{Convert symbol to dot object for visualization purpose.} +\usage{ +graph.viz(model, graph.title = "Computation graph", + graph.title.font.name = "Helvetica", graph.title.font.size = 30, + graph.width.px = 500, graph.height.px = 500) +} +\arguments{ +\item{model}{a \code{string} representing the path to a file containing the \code{JSon} of a model dump or the actual model dump.} + +\item{graph.title}{a \code{string} displayed on top of the viz.} + +\item{graph.title.font.name}{a \code{string} representing the font to use for the title.} + +\item{graph.title.font.size}{a \code{numeric} representing the size of the font to use for the title.} + +\item{graph.width.px}{a \code{numeric} representing the size (width) of the graph. In pixels} + +\item{graph.height.px}{a \code{numeric} representing the size (height) of the graph. In pixels} +} +\value{ +a graph object ready to be displayed with the \code{print} function. +} +\description{ +Convert symbol to dot object for visualization purpose. +} + diff --git a/R-package/man/is.mx.context.Rd b/R-package/man/is.mx.context.Rd index 6a2874208075..a05d2f72e644 100644 --- a/R-package/man/is.mx.context.Rd +++ b/R-package/man/is.mx.context.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{is.mx.context} \alias{is.mx.context} diff --git a/R-package/man/is.mx.dataiter.Rd b/R-package/man/is.mx.dataiter.Rd index 47cc5b0bc37e..e010af6b1984 100644 --- a/R-package/man/is.mx.dataiter.Rd +++ b/R-package/man/is.mx.dataiter.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \name{is.mx.dataiter} \alias{is.mx.dataiter} diff --git a/R-package/man/is.mx.ndarray.Rd b/R-package/man/is.mx.ndarray.Rd index e3e9d5ef9a81..80994cddadc1 100644 --- a/R-package/man/is.mx.ndarray.Rd +++ b/R-package/man/is.mx.ndarray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{is.mx.ndarray} \alias{is.mx.ndarray} @@ -17,5 +17,6 @@ mat = mx.nd.array(1:10) is.mx.ndarray(mat) mat2 = 1:10 is.mx.ndarray(mat2) + } diff --git a/R-package/man/is.mx.symbol.Rd b/R-package/man/is.mx.symbol.Rd index adee39247584..54546c9acca6 100644 --- a/R-package/man/is.mx.symbol.Rd +++ b/R-package/man/is.mx.symbol.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{is.mx.symbol} \alias{is.mx.symbol} diff --git a/R-package/man/length.MXNDArray.Rd b/R-package/man/length.MXNDArray.Rd index 79f6cc156481..059fab3d706c 100644 --- a/R-package/man/length.MXNDArray.Rd +++ b/R-package/man/length.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{length.MXNDArray} \alias{length.MXNDArray} diff --git a/R-package/man/mx.apply.Rd b/R-package/man/mx.apply.Rd index 44707a4c87fe..3bfb9ca6945e 100644 --- a/R-package/man/mx.apply.Rd +++ b/R-package/man/mx.apply.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.apply} \alias{mx.apply} diff --git a/R-package/man/mx.callback.log.train.metric.Rd b/R-package/man/mx.callback.log.train.metric.Rd index fb5502d8e4db..bdb2feaed8c1 100644 --- a/R-package/man/mx.callback.log.train.metric.Rd +++ b/R-package/man/mx.callback.log.train.metric.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/callback.R \name{mx.callback.log.train.metric} \alias{mx.callback.log.train.metric} diff --git a/R-package/man/mx.callback.save.checkpoint.Rd b/R-package/man/mx.callback.save.checkpoint.Rd index 92814aa3b041..defcce8e3d3f 100644 --- a/R-package/man/mx.callback.save.checkpoint.Rd +++ b/R-package/man/mx.callback.save.checkpoint.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/callback.R \name{mx.callback.save.checkpoint} \alias{mx.callback.save.checkpoint} diff --git a/R-package/man/mx.cpu.Rd b/R-package/man/mx.cpu.Rd index 4e3dcb6282c5..2c008239ddad 100644 --- a/R-package/man/mx.cpu.Rd +++ b/R-package/man/mx.cpu.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{mx.cpu} \alias{mx.cpu} diff --git a/R-package/man/mx.ctx.default.Rd b/R-package/man/mx.ctx.default.Rd index 6f599ba4453c..95d014966c25 100644 --- a/R-package/man/mx.ctx.default.Rd +++ b/R-package/man/mx.ctx.default.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{mx.ctx.default} \alias{mx.ctx.default} @@ -7,7 +7,7 @@ mx.ctx.default(new = NULL) } \arguments{ -\item{new,}{optional takes \code{mx.cpu()} or \code{mx.gpu(id)}, new default ctx.} +\item{new, }{optional takes \code{mx.cpu()} or \code{mx.gpu(id)}, new default ctx.} } \value{ The default context. diff --git a/R-package/man/mx.exec.backward.Rd b/R-package/man/mx.exec.backward.Rd index f4c922533160..0d62b1ff755e 100644 --- a/R-package/man/mx.exec.backward.Rd +++ b/R-package/man/mx.exec.backward.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/executor.R \name{mx.exec.backward} \alias{mx.exec.backward} diff --git a/R-package/man/mx.exec.forward.Rd b/R-package/man/mx.exec.forward.Rd index 9af80853dcc7..83529ef0ec9f 100644 --- a/R-package/man/mx.exec.forward.Rd +++ b/R-package/man/mx.exec.forward.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/executor.R \name{mx.exec.forward} \alias{mx.exec.forward} diff --git a/R-package/man/mx.exec.update.arg.arrays.Rd b/R-package/man/mx.exec.update.arg.arrays.Rd index b9bda6faffd9..64844961f61d 100644 --- a/R-package/man/mx.exec.update.arg.arrays.Rd +++ b/R-package/man/mx.exec.update.arg.arrays.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/executor.R \name{mx.exec.update.arg.arrays} \alias{mx.exec.update.arg.arrays} diff --git a/R-package/man/mx.exec.update.aux.arrays.Rd b/R-package/man/mx.exec.update.aux.arrays.Rd index e66d3365e541..c7511904fdc1 100644 --- a/R-package/man/mx.exec.update.aux.arrays.Rd +++ b/R-package/man/mx.exec.update.aux.arrays.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/executor.R \name{mx.exec.update.aux.arrays} \alias{mx.exec.update.aux.arrays} diff --git a/R-package/man/mx.gpu.Rd b/R-package/man/mx.gpu.Rd index a45710c7d278..5546073c2f07 100644 --- a/R-package/man/mx.gpu.Rd +++ b/R-package/man/mx.gpu.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/context.R \name{mx.gpu} \alias{mx.gpu} diff --git a/R-package/man/mx.init.create.Rd b/R-package/man/mx.init.create.Rd index fd76a0d66a58..847128320f75 100644 --- a/R-package/man/mx.init.create.Rd +++ b/R-package/man/mx.init.create.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initializer.R \name{mx.init.create} \alias{mx.init.create} diff --git a/R-package/man/mx.init.internal.default.Rd b/R-package/man/mx.init.internal.default.Rd index 195e70106ad2..0b167f040752 100644 --- a/R-package/man/mx.init.internal.default.Rd +++ b/R-package/man/mx.init.internal.default.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initializer.R \name{mx.init.internal.default} \alias{mx.init.internal.default} diff --git a/R-package/man/mx.init.normal.Rd b/R-package/man/mx.init.normal.Rd index aea30fd79fd2..2b8fe05ffe9b 100644 --- a/R-package/man/mx.init.normal.Rd +++ b/R-package/man/mx.init.normal.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initializer.R \name{mx.init.normal} \alias{mx.init.normal} diff --git a/R-package/man/mx.init.uniform.Rd b/R-package/man/mx.init.uniform.Rd index 0b8afd52921b..3e0d6a93fe9e 100644 --- a/R-package/man/mx.init.uniform.Rd +++ b/R-package/man/mx.init.uniform.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/initializer.R \name{mx.init.uniform} \alias{mx.init.uniform} diff --git a/R-package/man/mx.io.ImageRecordIter.Rd b/R-package/man/mx.io.ImageRecordIter.Rd index a1a4c6633f58..4e13ca79790a 100644 --- a/R-package/man/mx.io.ImageRecordIter.Rd +++ b/R-package/man/mx.io.ImageRecordIter.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.io.ImageRecordIter} \alias{mx.io.ImageRecordIter} diff --git a/R-package/man/mx.io.MNISTIter.Rd b/R-package/man/mx.io.MNISTIter.Rd index 798f71797991..2e239022319e 100644 --- a/R-package/man/mx.io.MNISTIter.Rd +++ b/R-package/man/mx.io.MNISTIter.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.io.MNISTIter} \alias{mx.io.MNISTIter} diff --git a/R-package/man/mx.io.arrayiter.Rd b/R-package/man/mx.io.arrayiter.Rd index cb0db7d4a7fa..c6651dcb12fc 100644 --- a/R-package/man/mx.io.arrayiter.Rd +++ b/R-package/man/mx.io.arrayiter.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \name{mx.io.arrayiter} \alias{mx.io.arrayiter} diff --git a/R-package/man/mx.io.extract.Rd b/R-package/man/mx.io.extract.Rd index 2897d87465cf..12a778b6609c 100644 --- a/R-package/man/mx.io.extract.Rd +++ b/R-package/man/mx.io.extract.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/io.R \name{mx.io.extract} \alias{mx.io.extract} diff --git a/R-package/man/mx.kv.create.Rd b/R-package/man/mx.kv.create.Rd index 4ad4d4c64b0a..2a602daea55e 100644 --- a/R-package/man/mx.kv.create.Rd +++ b/R-package/man/mx.kv.create.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/kvstore.R \name{mx.kv.create} \alias{mx.kv.create} diff --git a/R-package/man/mx.metric.accuracy.Rd b/R-package/man/mx.metric.accuracy.Rd index 174d77fed8f9..afbca8357ae7 100644 --- a/R-package/man/mx.metric.accuracy.Rd +++ b/R-package/man/mx.metric.accuracy.Rd @@ -1,15 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/metric.R \docType{data} \name{mx.metric.accuracy} \alias{mx.metric.accuracy} \title{Accuracy metric for classification} -\format{\preformatted{List of 3 - $ init :function () - $ update:function (label, pred, state) - $ get :function (state) - - attr(*, "class")= chr "mx.metric" -}} +\format{An object of class \code{mx.metric} of length 3.} \usage{ mx.metric.accuracy } diff --git a/R-package/man/mx.metric.custom.Rd b/R-package/man/mx.metric.custom.Rd index 5671c931ca2a..eb745decef34 100644 --- a/R-package/man/mx.metric.custom.Rd +++ b/R-package/man/mx.metric.custom.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/metric.R \name{mx.metric.custom} \alias{mx.metric.custom} diff --git a/R-package/man/mx.metric.mae.Rd b/R-package/man/mx.metric.mae.Rd index a98df21f7d7f..6bade0e5a8ee 100644 --- a/R-package/man/mx.metric.mae.Rd +++ b/R-package/man/mx.metric.mae.Rd @@ -1,15 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/metric.R \docType{data} \name{mx.metric.mae} \alias{mx.metric.mae} \title{MAE (Mean Absolute Error) metric for regression} -\format{\preformatted{List of 3 - $ init :function () - $ update:function (label, pred, state) - $ get :function (state) - - attr(*, "class")= chr "mx.metric" -}} +\format{An object of class \code{mx.metric} of length 3.} \usage{ mx.metric.mae } diff --git a/R-package/man/mx.metric.rmse.Rd b/R-package/man/mx.metric.rmse.Rd index 76b4696a910b..636dc37a8d0c 100644 --- a/R-package/man/mx.metric.rmse.Rd +++ b/R-package/man/mx.metric.rmse.Rd @@ -1,15 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/metric.R \docType{data} \name{mx.metric.rmse} \alias{mx.metric.rmse} \title{RMSE (Root Mean Squared Error) metric for regression} -\format{\preformatted{List of 3 - $ init :function () - $ update:function (label, pred, state) - $ get :function (state) - - attr(*, "class")= chr "mx.metric" -}} +\format{An object of class \code{mx.metric} of length 3.} \usage{ mx.metric.rmse } diff --git a/R-package/man/mx.metric.rmsle.Rd b/R-package/man/mx.metric.rmsle.Rd index 3e2737fe07b7..ffab1b3cb227 100644 --- a/R-package/man/mx.metric.rmsle.Rd +++ b/R-package/man/mx.metric.rmsle.Rd @@ -1,15 +1,10 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/metric.R \docType{data} \name{mx.metric.rmsle} \alias{mx.metric.rmsle} \title{RMSLE (Root Mean Squared Logarithmic Error) metric for regression} -\format{\preformatted{List of 3 - $ init :function () - $ update:function (label, pred, state) - $ get :function (state) - - attr(*, "class")= chr "mx.metric" -}} +\format{An object of class \code{mx.metric} of length 3.} \usage{ mx.metric.rmsle } diff --git a/R-package/man/mx.model.FeedForward.create.Rd b/R-package/man/mx.model.FeedForward.create.Rd index e64ad24dae44..a3a80338cfa3 100644 --- a/R-package/man/mx.model.FeedForward.create.Rd +++ b/R-package/man/mx.model.FeedForward.create.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.R \name{mx.model.FeedForward.create} \alias{mx.model.FeedForward.create} @@ -28,7 +28,7 @@ The number of iterations over training data to train the model.} \item{optimizer}{string, default="sgd" The optimization method.} -\item{initializer,}{initializer object. default=mx.init.uniform(0.01) +\item{initializer, }{initializer object. default=mx.init.uniform(0.01) The initialization scheme for parameters.} \item{eval.data}{mx.io.DataIter or list(data=R.array, label=R.array), optional diff --git a/R-package/man/mx.model.load.Rd b/R-package/man/mx.model.load.Rd index c5b8781c80e7..72a6b33514c2 100644 --- a/R-package/man/mx.model.load.Rd +++ b/R-package/man/mx.model.load.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.R \name{mx.model.load} \alias{mx.model.load} diff --git a/R-package/man/mx.model.save.Rd b/R-package/man/mx.model.save.Rd index 0cbc724b2eb1..dbf13f653837 100644 --- a/R-package/man/mx.model.save.Rd +++ b/R-package/man/mx.model.save.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.R \name{mx.model.save} \alias{mx.model.save} diff --git a/R-package/man/mx.nd.array.Rd b/R-package/man/mx.nd.array.Rd index 061ba0912094..95cefb4e93fd 100644 --- a/R-package/man/mx.nd.array.Rd +++ b/R-package/man/mx.nd.array.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.array} \alias{mx.nd.array} @@ -23,5 +23,6 @@ Create a new \code{mx.ndarray} that copies the content from src on ctx. mat = mx.nd.array(x) mat = 1 - mat + (2 * mat)/(mat + 0.5) as.array(mat) + } diff --git a/R-package/man/mx.nd.choose.element.Rd b/R-package/man/mx.nd.choose.element.0index.Rd similarity index 52% rename from R-package/man/mx.nd.choose.element.Rd rename to R-package/man/mx.nd.choose.element.0index.Rd index 19db0393ec76..bae9776cdae1 100644 --- a/R-package/man/mx.nd.choose.element.Rd +++ b/R-package/man/mx.nd.choose.element.0index.Rd @@ -1,8 +1,8 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R -\name{mx.nd.choose.element} -\alias{mx.nd.choose.element} -\title{Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs} +\name{mx.nd.choose.element.0index} +\alias{mx.nd.choose.element.0index} +\title{Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based index.} \arguments{ \item{lhs}{NDArray Left operand to the function.} @@ -14,6 +14,6 @@ Right operand to the function.} out The result mx.ndarray } \description{ -Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs +Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based index. } diff --git a/R-package/man/mx.nd.clip.Rd b/R-package/man/mx.nd.clip.Rd index 5256bd98f54e..8185f36fdcfa 100644 --- a/R-package/man/mx.nd.clip.Rd +++ b/R-package/man/mx.nd.clip.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.nd.clip} \alias{mx.nd.clip} diff --git a/R-package/man/mx.nd.copyto.Rd b/R-package/man/mx.nd.copyto.Rd index 2c8d721572ab..6c3e1c0bd73e 100644 --- a/R-package/man/mx.nd.copyto.Rd +++ b/R-package/man/mx.nd.copyto.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.copyto} \alias{mx.nd.copyto} diff --git a/R-package/man/mx.nd.dot.Rd b/R-package/man/mx.nd.dot.Rd index 40c9d8e0550c..2576d70be2c8 100644 --- a/R-package/man/mx.nd.dot.Rd +++ b/R-package/man/mx.nd.dot.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.nd.dot} \alias{mx.nd.dot} diff --git a/R-package/man/mx.nd.exp.Rd b/R-package/man/mx.nd.exp.Rd new file mode 100644 index 000000000000..b876d8235f51 --- /dev/null +++ b/R-package/man/mx.nd.exp.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.nd.exp} +\alias{mx.nd.exp} +\title{Take exp of the src} +\arguments{ +\item{src}{NDArray +Source input to the function} +} +\value{ +out The result mx.ndarray +} +\description{ +Take exp of the src +} + diff --git a/R-package/man/mx.nd.load.Rd b/R-package/man/mx.nd.load.Rd index 850e943eebae..0deb2e0af36d 100644 --- a/R-package/man/mx.nd.load.Rd +++ b/R-package/man/mx.nd.load.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.load} \alias{mx.nd.load} @@ -18,5 +18,6 @@ mx.nd.save(mat, 'temp.mat') mat2 = mx.nd.load('temp.mat') as.array(mat) as.array(mat2) + } diff --git a/R-package/man/mx.nd.log.Rd b/R-package/man/mx.nd.log.Rd new file mode 100644 index 000000000000..f5825e55d6b2 --- /dev/null +++ b/R-package/man/mx.nd.log.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.nd.log} +\alias{mx.nd.log} +\title{Take log of the src} +\arguments{ +\item{src}{NDArray +Source input to the function} +} +\value{ +out The result mx.ndarray +} +\description{ +Take log of the src +} + diff --git a/R-package/man/mx.nd.norm.Rd b/R-package/man/mx.nd.norm.Rd new file mode 100644 index 000000000000..7e2feb7a889f --- /dev/null +++ b/R-package/man/mx.nd.norm.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.nd.norm} +\alias{mx.nd.norm} +\title{Take L2 norm of the src.The result will be ndarray of shape (1,) on the same device.} +\arguments{ +\item{src}{NDArray +Source input to the function} +} +\value{ +out The result mx.ndarray +} +\description{ +Take L2 norm of the src.The result will be ndarray of shape (1,) on the same device. +} + diff --git a/R-package/man/mx.nd.ones.Rd b/R-package/man/mx.nd.ones.Rd index 2f7bc8acb290..c191c4c26578 100644 --- a/R-package/man/mx.nd.ones.Rd +++ b/R-package/man/mx.nd.ones.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.ones} \alias{mx.nd.ones} @@ -21,5 +21,6 @@ mat2 = mx.nd.ones(c(5,5)) as.array(mat) mat3 = mx.nd.ones(c(3,3,3)) as.array(mat3) + } diff --git a/R-package/man/mx.nd.save.Rd b/R-package/man/mx.nd.save.Rd index bca981d47c96..7f20599183a4 100644 --- a/R-package/man/mx.nd.save.Rd +++ b/R-package/man/mx.nd.save.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.save} \alias{mx.nd.save} @@ -20,5 +20,6 @@ mx.nd.save(mat, 'temp.mat') mat2 = mx.nd.load('temp.mat') as.array(mat) as.array(mat2) + } diff --git a/R-package/man/mx.nd.sqrt.Rd b/R-package/man/mx.nd.sqrt.Rd index af96445d89f7..2a7e3a0fe5e5 100644 --- a/R-package/man/mx.nd.sqrt.Rd +++ b/R-package/man/mx.nd.sqrt.Rd @@ -1,16 +1,16 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.nd.sqrt} \alias{mx.nd.sqrt} -\title{Take square root of the src} +\title{Take sqrt of the src} \arguments{ \item{src}{NDArray -Source input to the function.} +Source input to the function} } \value{ out The result mx.ndarray } \description{ -Take square root of the src +Take sqrt of the src } diff --git a/R-package/man/mx.nd.square.Rd b/R-package/man/mx.nd.square.Rd index 063b1359ee6e..ea5025a8b7a0 100644 --- a/R-package/man/mx.nd.square.Rd +++ b/R-package/man/mx.nd.square.Rd @@ -1,11 +1,11 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.nd.square} \alias{mx.nd.square} \title{Take square of the src} \arguments{ \item{src}{NDArray -Source input to the function.} +Source input to the function} } \value{ out The result mx.ndarray diff --git a/R-package/man/mx.nd.zeros.Rd b/R-package/man/mx.nd.zeros.Rd index 6d522abbec08..3736bbbe90da 100644 --- a/R-package/man/mx.nd.zeros.Rd +++ b/R-package/man/mx.nd.zeros.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{mx.nd.zeros} \alias{mx.nd.zeros} @@ -21,5 +21,6 @@ mat2 = mx.nd.zeros(c(5,5)) as.array(mat) mat3 = mx.nd.zeroes(c(3,3,3)) as.array(mat3) + } diff --git a/R-package/man/mx.opt.create.Rd b/R-package/man/mx.opt.create.Rd index 813baf90454d..b1c0c07b97ac 100644 --- a/R-package/man/mx.opt.create.Rd +++ b/R-package/man/mx.opt.create.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/optimizer.R \name{mx.opt.create} \alias{mx.opt.create} diff --git a/R-package/man/mx.opt.get.updater.Rd b/R-package/man/mx.opt.get.updater.Rd index db63d7cb6637..b0cb07b649c9 100644 --- a/R-package/man/mx.opt.get.updater.Rd +++ b/R-package/man/mx.opt.get.updater.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/optimizer.R \name{mx.opt.get.updater} \alias{mx.opt.get.updater} diff --git a/R-package/man/mx.opt.sgd.Rd b/R-package/man/mx.opt.sgd.Rd index f81e3ad81cfe..6493c4c37176 100644 --- a/R-package/man/mx.opt.sgd.Rd +++ b/R-package/man/mx.opt.sgd.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/optimizer.R \name{mx.opt.sgd} \alias{mx.opt.sgd} diff --git a/R-package/man/mx.rnorm.Rd b/R-package/man/mx.rnorm.Rd index 43a63c000394..8c87a325dd97 100644 --- a/R-package/man/mx.rnorm.Rd +++ b/R-package/man/mx.rnorm.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/random.R \name{mx.rnorm} \alias{mx.rnorm} @@ -13,17 +13,19 @@ mx.rnorm(shape, mean = 0, sd = 1, ctx = NULL) \item{sd}{numeric, The standard deviations.} -\item{ctx,}{optional The context device of the array. mx.ctx.default() will be used in default.} +\item{ctx, }{optional The context device of the array. mx.ctx.default() will be used in default.} } \description{ Generate nomal distribution with mean and sd. } \examples{ + mx.set.seed(0) as.array(mx.runif(2)) # 0.5488135 0.5928446 mx.set.seed(0) as.array(mx.rnorm(2)) # 2.212206 1.163079 + } diff --git a/R-package/man/mx.runif.Rd b/R-package/man/mx.runif.Rd index bf89e502c488..565b96ce3043 100644 --- a/R-package/man/mx.runif.Rd +++ b/R-package/man/mx.runif.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/random.R \name{mx.runif} \alias{mx.runif} @@ -13,17 +13,19 @@ mx.runif(shape, min = 0, max = 1, ctx = NULL) \item{max}{numeric, The upper bound of distribution.} -\item{ctx,}{optional The context device of the array. mx.ctx.default() will be used in default.} +\item{ctx, }{optional The context device of the array. mx.ctx.default() will be used in default.} } \description{ Generate uniform distribution in [low, high) with specified shape. } \examples{ + mx.set.seed(0) as.array(mx.runif(2)) # 0.5488135 0.5928446 mx.set.seed(0) as.array(mx.rnorm(2)) # 2.212206 1.163079 + } diff --git a/R-package/man/mx.set.seed.Rd b/R-package/man/mx.set.seed.Rd index 4139f4ab9e08..4ab6e67ea7ef 100644 --- a/R-package/man/mx.set.seed.Rd +++ b/R-package/man/mx.set.seed.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/random.R \name{mx.set.seed} \alias{mx.set.seed} @@ -23,11 +23,13 @@ random number generations. It can also be quite costly to seed these PRNGs. So we introduced \code{mx.set.seed} for mxnet specific device random numbers. } \examples{ + mx.set.seed(0) as.array(mx.runif(2)) # 0.5488135 0.5928446 mx.set.seed(0) as.array(mx.rnorm(2)) # 2.212206 1.163079 + } diff --git a/R-package/man/mx.simple.bind.Rd b/R-package/man/mx.simple.bind.Rd index 4745a200477a..72af44cca995 100644 --- a/R-package/man/mx.simple.bind.Rd +++ b/R-package/man/mx.simple.bind.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/executor.R \name{mx.simple.bind} \alias{mx.simple.bind} diff --git a/R-package/man/mx.symbol.Activation.Rd b/R-package/man/mx.symbol.Activation.Rd index b3c2f38780ab..3fd9892faedc 100644 --- a/R-package/man/mx.symbol.Activation.Rd +++ b/R-package/man/mx.symbol.Activation.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Activation} \alias{mx.symbol.Activation} diff --git a/R-package/man/mx.symbol.BatchNorm.Rd b/R-package/man/mx.symbol.BatchNorm.Rd index 838e89ce0db2..2f7a984d5d97 100644 --- a/R-package/man/mx.symbol.BatchNorm.Rd +++ b/R-package/man/mx.symbol.BatchNorm.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.BatchNorm} \alias{mx.symbol.BatchNorm} diff --git a/R-package/man/mx.symbol.BlockGrad.Rd b/R-package/man/mx.symbol.BlockGrad.Rd new file mode 100644 index 000000000000..6a7e6037e7a6 --- /dev/null +++ b/R-package/man/mx.symbol.BlockGrad.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.BlockGrad} +\alias{mx.symbol.BlockGrad} +\title{Get output from a symbol and pass 0 gradient back} +\usage{ +mx.symbol.BlockGrad(...) +} +\arguments{ +\item{data}{Symbol +Input data.} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Get output from a symbol and pass 0 gradient back +} + diff --git a/R-package/man/mx.symbol.Concat.Rd b/R-package/man/mx.symbol.Concat.Rd index 8254d0fadabe..e290ede87c9a 100644 --- a/R-package/man/mx.symbol.Concat.Rd +++ b/R-package/man/mx.symbol.Concat.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Concat} \alias{mx.symbol.Concat} diff --git a/R-package/man/mx.symbol.Convolution.Rd b/R-package/man/mx.symbol.Convolution.Rd index ba5f0d666cf8..8914c6cbec78 100644 --- a/R-package/man/mx.symbol.Convolution.Rd +++ b/R-package/man/mx.symbol.Convolution.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Convolution} \alias{mx.symbol.Convolution} @@ -29,7 +29,7 @@ pad for convolution: (y, x)} convolution filter(channel) number} \item{num.group}{int (non-negative), optional, default=1 -number of groups partition} +Number of groups partition. This option is not supported by CuDNN, you can use SliceChannel to num_group,apply convolution and concat instead to achieve the same need.} \item{workspace}{long (non-negative), optional, default=512 Tmp workspace for convolution (MB)} diff --git a/R-package/man/mx.symbol.Dropout.Rd b/R-package/man/mx.symbol.Dropout.Rd index 560b580eeba1..f86e475c8417 100644 --- a/R-package/man/mx.symbol.Dropout.Rd +++ b/R-package/man/mx.symbol.Dropout.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Dropout} \alias{mx.symbol.Dropout} diff --git a/R-package/man/mx.symbol.ElementWiseSum.Rd b/R-package/man/mx.symbol.ElementWiseSum.Rd index 601c7d03a60e..4bbdd72ef425 100644 --- a/R-package/man/mx.symbol.ElementWiseSum.Rd +++ b/R-package/man/mx.symbol.ElementWiseSum.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.ElementWiseSum} \alias{mx.symbol.ElementWiseSum} diff --git a/R-package/man/mx.symbol.Flatten.Rd b/R-package/man/mx.symbol.Flatten.Rd index de96b1a6b41e..8f0c239d67d5 100644 --- a/R-package/man/mx.symbol.Flatten.Rd +++ b/R-package/man/mx.symbol.Flatten.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Flatten} \alias{mx.symbol.Flatten} diff --git a/R-package/man/mx.symbol.FullyConnected.Rd b/R-package/man/mx.symbol.FullyConnected.Rd index 31d87ef1cf81..fab961dd4ee1 100644 --- a/R-package/man/mx.symbol.FullyConnected.Rd +++ b/R-package/man/mx.symbol.FullyConnected.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.FullyConnected} \alias{mx.symbol.FullyConnected} diff --git a/R-package/man/mx.symbol.Group.Rd b/R-package/man/mx.symbol.Group.Rd index f46e30a13731..c3162db22188 100644 --- a/R-package/man/mx.symbol.Group.Rd +++ b/R-package/man/mx.symbol.Group.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.symbol.Group} \alias{mx.symbol.Group} diff --git a/R-package/man/mx.symbol.LRN.Rd b/R-package/man/mx.symbol.LRN.Rd index 1c74dfc5bd5a..748767828b1a 100644 --- a/R-package/man/mx.symbol.LRN.Rd +++ b/R-package/man/mx.symbol.LRN.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.LRN} \alias{mx.symbol.LRN} diff --git a/R-package/man/mx.symbol.LeakyReLU.Rd b/R-package/man/mx.symbol.LeakyReLU.Rd index bb843847555a..3a91c82e2df7 100644 --- a/R-package/man/mx.symbol.LeakyReLU.Rd +++ b/R-package/man/mx.symbol.LeakyReLU.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.LeakyReLU} \alias{mx.symbol.LeakyReLU} diff --git a/R-package/man/mx.symbol.LinearRegressionOutput.Rd b/R-package/man/mx.symbol.LinearRegressionOutput.Rd index 8d00dd325d1b..4dd9faef6082 100644 --- a/R-package/man/mx.symbol.LinearRegressionOutput.Rd +++ b/R-package/man/mx.symbol.LinearRegressionOutput.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.LinearRegressionOutput} \alias{mx.symbol.LinearRegressionOutput} diff --git a/R-package/man/mx.symbol.LogisticRegressionOutput.Rd b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd index 221816ea6c15..f6825519961e 100644 --- a/R-package/man/mx.symbol.LogisticRegressionOutput.Rd +++ b/R-package/man/mx.symbol.LogisticRegressionOutput.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.LogisticRegressionOutput} \alias{mx.symbol.LogisticRegressionOutput} diff --git a/R-package/man/mx.symbol.Pooling.Rd b/R-package/man/mx.symbol.Pooling.Rd index b7faf88b7a97..427c0df10ecc 100644 --- a/R-package/man/mx.symbol.Pooling.Rd +++ b/R-package/man/mx.symbol.Pooling.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Pooling} \alias{mx.symbol.Pooling} diff --git a/R-package/man/mx.symbol.Reshape.Rd b/R-package/man/mx.symbol.Reshape.Rd index b06b74973ae3..803e5d1d4335 100644 --- a/R-package/man/mx.symbol.Reshape.Rd +++ b/R-package/man/mx.symbol.Reshape.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Reshape} \alias{mx.symbol.Reshape} diff --git a/R-package/man/mx.symbol.SliceChannel.Rd b/R-package/man/mx.symbol.SliceChannel.Rd index 3416b6fbfb5c..93595749195c 100644 --- a/R-package/man/mx.symbol.SliceChannel.Rd +++ b/R-package/man/mx.symbol.SliceChannel.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.SliceChannel} \alias{mx.symbol.SliceChannel} diff --git a/R-package/man/mx.symbol.Softmax.Rd b/R-package/man/mx.symbol.Softmax.Rd index 0b9fd0abf869..d574270170a1 100644 --- a/R-package/man/mx.symbol.Softmax.Rd +++ b/R-package/man/mx.symbol.Softmax.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mxnet_generated.R \name{mx.symbol.Softmax} \alias{mx.symbol.Softmax} @@ -13,6 +13,9 @@ Input data to softmax.} \item{grad.scale}{float, optional, default=1 Scale the gradient by a float factor} +\item{multi.output}{boolean, optional, default=False +If set to true, for a (n,k,x_1,..,x_n) dimensionalinput tensor, softmax will generate n*x_1*...*x_n output, eachhas k classes} + \item{name}{string, optional Name of the resulting symbol.} } diff --git a/R-package/man/mx.symbol.Variable.Rd b/R-package/man/mx.symbol.Variable.Rd index 725e366b3e53..304609ce6ec7 100644 --- a/R-package/man/mx.symbol.Variable.Rd +++ b/R-package/man/mx.symbol.Variable.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.symbol.Variable} \alias{mx.symbol.Variable} diff --git a/R-package/man/mx.symbol.exp.Rd b/R-package/man/mx.symbol.exp.Rd new file mode 100644 index 000000000000..7ae386a70573 --- /dev/null +++ b/R-package/man/mx.symbol.exp.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.exp} +\alias{mx.symbol.exp} +\title{Take exp of the src} +\usage{ +mx.symbol.exp(...) +} +\arguments{ +\item{src}{Symbol +Source symbolic input to the function} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Take exp of the src +} + diff --git a/R-package/man/mx.symbol.infer.shape.Rd b/R-package/man/mx.symbol.infer.shape.Rd index 0494e982a304..8d965bbea078 100644 --- a/R-package/man/mx.symbol.infer.shape.Rd +++ b/R-package/man/mx.symbol.infer.shape.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.symbol.infer.shape} \alias{mx.symbol.infer.shape} diff --git a/R-package/man/mx.symbol.load.Rd b/R-package/man/mx.symbol.load.Rd index 19a2f11d7329..6af053b978f6 100644 --- a/R-package/man/mx.symbol.load.Rd +++ b/R-package/man/mx.symbol.load.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.symbol.load} \alias{mx.symbol.load} @@ -16,5 +16,6 @@ Load an mx.symbol object data = mx.symbol.Variable('data') mx.symbol.save(data, 'temp.symbol') data2 = mx.symbol.load('temp.symbol') + } diff --git a/R-package/man/mx.symbol.log.Rd b/R-package/man/mx.symbol.log.Rd new file mode 100644 index 000000000000..4bd3ac0f0122 --- /dev/null +++ b/R-package/man/mx.symbol.log.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.log} +\alias{mx.symbol.log} +\title{Take log of the src} +\usage{ +mx.symbol.log(...) +} +\arguments{ +\item{src}{Symbol +Source symbolic input to the function} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Take log of the src +} + diff --git a/R-package/man/mx.symbol.save.Rd b/R-package/man/mx.symbol.save.Rd index 8cc86655e055..ab6cef2d1df9 100644 --- a/R-package/man/mx.symbol.save.Rd +++ b/R-package/man/mx.symbol.save.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{mx.symbol.save} \alias{mx.symbol.save} @@ -18,5 +18,6 @@ Save an mx.symbol object data = mx.symbol.Variable('data') mx.symbol.save(data, 'temp.symbol') data2 = mx.symbol.load('temp.symbol') + } diff --git a/R-package/man/mx.symbol.sqrt.Rd b/R-package/man/mx.symbol.sqrt.Rd new file mode 100644 index 000000000000..c810c5a845fb --- /dev/null +++ b/R-package/man/mx.symbol.sqrt.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.sqrt} +\alias{mx.symbol.sqrt} +\title{Take sqrt of the src} +\usage{ +mx.symbol.sqrt(...) +} +\arguments{ +\item{src}{Symbol +Source symbolic input to the function} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Take sqrt of the src +} + diff --git a/R-package/man/mx.symbol.square.Rd b/R-package/man/mx.symbol.square.Rd new file mode 100644 index 000000000000..c64b4aa00590 --- /dev/null +++ b/R-package/man/mx.symbol.square.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mxnet_generated.R +\name{mx.symbol.square} +\alias{mx.symbol.square} +\title{Take square of the src} +\usage{ +mx.symbol.square(...) +} +\arguments{ +\item{src}{Symbol +Source symbolic input to the function} + +\item{name}{string, optional +Name of the resulting symbol.} +} +\value{ +out The result mx.symbol +} +\description{ +Take square of the src +} + diff --git a/R-package/man/mxnet.Rd b/R-package/man/mxnet.Rd index df05e4ae6230..c3af345122fc 100644 --- a/R-package/man/mxnet.Rd +++ b/R-package/man/mxnet.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/zzz.R \docType{package} \name{mxnet} diff --git a/R-package/man/mxnet.export.Rd b/R-package/man/mxnet.export.Rd index 69a10c5b16be..e8bdc1c26028 100644 --- a/R-package/man/mxnet.export.Rd +++ b/R-package/man/mxnet.export.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/util.R \name{mxnet.export} \alias{mxnet.export} diff --git a/R-package/man/outputs.Rd b/R-package/man/outputs.Rd index acf1fa31e98c..95f184468df8 100644 --- a/R-package/man/outputs.Rd +++ b/R-package/man/outputs.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/symbol.R \name{outputs} \alias{outputs} diff --git a/R-package/man/predict.MXFeedForwardModel.Rd b/R-package/man/predict.MXFeedForwardModel.Rd index f6fb970f102f..a9802491a307 100644 --- a/R-package/man/predict.MXFeedForwardModel.Rd +++ b/R-package/man/predict.MXFeedForwardModel.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/model.R \name{predict.MXFeedForwardModel} \alias{predict.MXFeedForwardModel} @@ -17,11 +17,11 @@ \item{array.batch.size}{The batch size used in batching. Only used when X is R's array.} \item{array.layout}{can be "auto", "colmajor", "rowmajor", (detault=auto) - The layout of array. "rowmajor" is only supported for two dimensional array. - For matrix, "rowmajor" means dim(X) = c(nexample, nfeatures), - "colmajor" means dim(X) = c(nfeatures, nexample) - "auto" will auto detect the layout by match the feature size, - and will report error when X is a square matrix to ask user to explicitly specify layout.} +The layout of array. "rowmajor" is only supported for two dimensional array. +For matrix, "rowmajor" means dim(X) = c(nexample, nfeatures), +"colmajor" means dim(X) = c(nfeatures, nexample) +"auto" will auto detect the layout by match the feature size, + and will report error when X is a square matrix to ask user to explicitly specify layout.} } \description{ Predict the outputs given a model and dataset. diff --git a/R-package/man/print.MXNDArray.Rd b/R-package/man/print.MXNDArray.Rd index cafcadd31992..769324f3cdf1 100644 --- a/R-package/man/print.MXNDArray.Rd +++ b/R-package/man/print.MXNDArray.Rd @@ -1,4 +1,4 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand +% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ndarray.R \name{print.MXNDArray} \alias{print.MXNDArray} diff --git a/R-package/tests/testthat/test_model.R b/R-package/tests/testthat/test_model.R new file mode 100644 index 000000000000..93784a622bbb --- /dev/null +++ b/R-package/tests/testthat/test_model.R @@ -0,0 +1,62 @@ +require(mxnet) + +context("models") + +# test_that("basic symbol operation", { +# # Network configuration +# batch.size <- 100 +# data <- mx.symbol.Variable("data") +# fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128) +# act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") +# fc2 <- mx.symbol.FullyConnected(act1, name = "fc2", num_hidden = 64) +# act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") +# fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) +# softmax <- mx.symbol.Softmax(fc3, name = "sm") +# +# dtrain = mx.io.MNISTIter( +# image="data/train-images-idx3-ubyte", +# label="data/train-labels-idx1-ubyte", +# data.shape=c(784), +# batch.size=batch.size, +# shuffle=TRUE, +# flat=TRUE, +# silent=0, +# seed=10) +# +# dtest = mx.io.MNISTIter( +# image="data/t10k-images-idx3-ubyte", +# label="data/t10k-labels-idx1-ubyte", +# data.shape=c(784), +# batch.size=batch.size, +# shuffle=FALSE, +# flat=TRUE, +# silent=0) +# +# mx.set.seed(0) +# devices = lapply(1:2, function(i) { +# mx.cpu(i) +# }) +# +# # create the model +# model <- mx.model.FeedForward.create(softmax, X=dtrain, eval.data=dtest, +# ctx=devices, num.round=1, +# learning.rate=0.1, momentum=0.9, +# initializer=mx.init.uniform(0.07), +# epoch.end.callback=mx.callback.save.checkpoint("chkpt"), +# batch.end.callback=mx.callback.log.train.metric(100)) +# +# # do prediction +# pred <- predict(model, dtest) +# label <- mx.io.extract(dtest, "label") +# dataX <- mx.io.extract(dtest, "data") +# # Predict with R's array +# pred2 <- predict(model, X=dataX) +# +# accuracy <- function(label, pred) { +# ypred = max.col(t(as.array(pred))) +# return(sum((as.array(label) + 1) == ypred) / length(label)) +# } +# +# print(paste0("Finish prediction... accuracy=", accuracy(label, pred))) +# print(paste0("Finish prediction... accuracy2=", accuracy(label, pred2))) +# }) diff --git a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd index 82ad3cd4515a..b6c81dcd28fc 100644 --- a/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd +++ b/R-package/vignettes/fiveMinutesNeuralNetwork.Rmd @@ -45,8 +45,8 @@ fc1 <- mx.symbol.FullyConnected(data, num_hidden=20) act1 <- mx.symbol.Activation(fc1, act_type="tanh") fc2 <- mx.symbol.FullyConnected(act1, num_hidden=2) -# Softmax function for the output layer -softmax <- mx.symbol.Softmax(fc2) +# SoftmaxOutput means multi-class probability prediction. +softmax <- mx.symbol.SoftmaxOutput(fc2) ``` According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need. @@ -103,7 +103,7 @@ data <- mx.symbol.Variable("data") # num_hidden: number of neurons in this hidden layer fc1 <- mx.symbol.FullyConnected(data, num_hidden=1) -# Softmax function for the output layer +# Use linear regression for the output layer lro <- mx.symbol.LinearRegressionOutput(fc1) ``` diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd index 1913887426cf..d34e92adf262 100644 --- a/R-package/vignettes/mnistCompetition.Rmd +++ b/R-package/vignettes/mnistCompetition.Rmd @@ -49,7 +49,7 @@ act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=64) act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.Softmax(fc3, name="sm") +softmax <- mx.symbol.SoftmaxOutput(fc3, name="sm") ``` 1. In `mxnet`, we use its own data type `symbol` to configure the network. `data <- mx.symbol.Variable("data")` use `data` to represent the input data, i.e. the input layer. @@ -128,7 +128,7 @@ tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") # second fullc fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss -lenet <- mx.symbol.Softmax(data=fc2) +lenet <- mx.symbol.SoftmaxOutput(data=fc2) ``` Then let us reshape the matrices into arrays: @@ -143,7 +143,7 @@ dim(test.array) <- c(28, 28, 1, ncol(test)) Next we are going to compare the training speed on different devices, so the definition of the devices goes first: ```{r} -n.gpu <- 1 +n.gpu <- 1 device.cpu <- mx.cpu() device.gpu <- lapply(0:(n.gpu-1), function(i) { mx.gpu(i) @@ -163,7 +163,7 @@ model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, learning.rate=0.05, momentum=0.9, wd=0.00001, eval.metric=mx.metric.accuracy, epoch.end.callback=mx.callback.log.train.metric(100)) -print(proc.time() - tic) +print(proc.time() - tic) ``` Training on GPU: @@ -176,7 +176,7 @@ model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, learning.rate=0.05, momentum=0.9, wd=0.00001, eval.metric=mx.metric.accuracy, epoch.end.callback=mx.callback.log.train.metric(100)) -print(proc.time() - tic) +print(proc.time() - tic) ``` As you can see by using GPU, we can get a much faster speedup in training! diff --git a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd index 0f69d5449344..3c729664558c 100644 --- a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd +++ b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd @@ -13,7 +13,7 @@ applications. There are two major concepts introduced in this tutorial. ## NDArray: Vectorized tensor computations on CPUs and GPUs -`NDArray` is the basic vectorized operation unit in MXNet for matrix and tensor computations. +`NDArray` is the basic vectorized operation unit in MXNet for matrix and tensor computations. Users can perform usual calculations as on R"s array, but with two additional features: 1. **multiple devices**: all operations can be run on various devices including @@ -67,7 +67,7 @@ d <- c / a - 5 as.array(d) ``` -If two `NDArray`s sit on different divices, we need to explicitly move them +If two `NDArray`s sit on different divices, we need to explicitly move them into the same one. For instance: ```{r, eval=FALSE} @@ -93,7 +93,7 @@ a <- mx.nd.load("temp.ndarray") as.array(a[[1]]) ``` -In case you want to save data to the distributed file system such as S3 and HDFS, +In case you want to save data to the distributed file system such as S3 and HDFS, we can directly save to and load from them. For example: ```{r,eval=FALSE} @@ -156,7 +156,7 @@ net <- mx.symbol.Variable("data") net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128) net <- mx.symbol.Activation(data=net, name="relu1", act_type="relu") net <- mx.symbol.FullyConnected(data=net, name="fc2", num_hidden=64) -net <- mx.symbol.Softmax(data=net, name="out") +net <- mx.symbol.SoftmaxOutput(data=net, name="out") class(net) ``` diff --git a/README.md b/README.md index aef489ff7920..2c9d5a539b24 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,11 @@ deep learning programs together to maximize the efficiency and your productivity What's New ---------- +* [Minimum MXNet Library in One File](amalgamation) * [Training Deep Net on 14 Million Images on A Single Machine](https://mxnet-bing.readthedocs.org/en/latest/tutorial/imagenet_full.html) * [MXNet.jl Julia binding initial release](https://github.com/dmlc/MXNet.jl) * [Design Note: Squeeze the Memory Consumption of Deep Learning](http://mxnet.readthedocs.org/en/latest/developer-guide/note_memory.html) -* [LSTM Example by using symbolic API](https://github.com/dmlc/mxnet/tree/master/example/rnn) -* [MXNet R Package brings Deep learning for R!](https://github.com/dmlc/mxnet/tree/master/R-package) -* [Design Note: Dependency Engine for Deep Learning](http://mxnet.readthedocs.org/en/latest/developer-guide/note_engine.html) + Contents -------- diff --git a/amalgamation/.gitignore b/amalgamation/.gitignore new file mode 100644 index 000000000000..318284280c8a --- /dev/null +++ b/amalgamation/.gitignore @@ -0,0 +1 @@ +*-all.cc diff --git a/amalgamation/Makefile b/amalgamation/Makefile new file mode 100644 index 000000000000..def30163f109 --- /dev/null +++ b/amalgamation/Makefile @@ -0,0 +1,44 @@ +export MXNET_ROOT=`pwd`/.. +# Change this to path of openblas +export OPENBLAS_ROOT=`pwd`/OpenBLAS + +# Whether use minimum build without blas and SSE, this will make the library super slow +ifndef MIN + export MIN= 0 +endif + +.PHONY: all clean + +CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall +LDFLAGS=-lrt + +ifneq ($(MIN), 1) + CFLAGS+= -I${OPENBLAS_ROOT} + LDFLAGS+=-L${OPENBLAS_ROOT} -lopenblas +endif + +all: libmxnet_predict.a ${MXNET_ROOT}/lib/libmxnet_predict.so + +mxnet_predict0.d: mxnet_predict0.cc + ${CXX} ${CFLAGS} -MD -MF $@ \ + -I ${MXNET_ROOT}/ -I ${MXNET_ROOT}/mshadow/ -I ${MXNET_ROOT}/dmlc-core/include \ + -I ${MXNET_ROOT}/include -c $+ + rm mxnet_predict0.o + +mxnet_predict-all.cc: mxnet_predict0.d mxnet_predict0.cc + @echo "Generating amalgamation to " $@ + python ./amalgamation.py $+ $@ $(MIN) + +mxnet_predict-all.o: mxnet_predict-all.cc + ${CXX} ${CFLAGS} -fPIC -o $@ -c $+ + +libmxnet_predict.a: mxnet_predict-all.o + ar rcs libmxnet_predict.a $+ + +${MXNET_ROOT}/lib/libmxnet_predict.so: mxnet_predict-all.o + @mkdir -p ${MXNET_ROOT}/lib + ${CXX} ${CFLAGS} -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS) + ls -alh $@ + +clean: + rm -f *.d *.o diff --git a/amalgamation/README.md b/amalgamation/README.md new file mode 100644 index 000000000000..c42a86981e50 --- /dev/null +++ b/amalgamation/README.md @@ -0,0 +1,23 @@ +MXNet Amalgamation +================== +This folder contains a amalgamation generation script to generate the entire mxnet library into one file. +Currently it supports generation for [predict API](../include/mxnet/c_predict_api.h), +which allows you to run prediction in platform independent way. + +How to Generate the Amalgamation +-------------------------------- +Type ```make``` will generate the following files +- mxnet_predict-all.cc + - The file you can used to compile predict API +- ../lib/libmxnet_predict.so + - The dynamic library generated for prediction. + +You can also checkout the [Makefile](Makefile) + +Dependency +---------- +The only dependency is a BLAS library. + +Acknowledgement +--------------- +This module is created by [Jack Deng](https://github.com/jdeng). diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py new file mode 100644 index 000000000000..888367b8b345 --- /dev/null +++ b/amalgamation/amalgamation.py @@ -0,0 +1,125 @@ +import sys +import os.path, re, StringIO + +blacklist = [ + 'Windows.h', 'cublas_v2.h', 'cuda/tensor_gpu-inl.cuh', + 'cuda_runtime.h', 'cudnn.h', 'cudnn_lrn-inl.h', 'curand.h', + 'glog/logging.h', 'io/azure_filesys.h', 'io/hdfs_filesys.h', 'io/s3_filesys.h', + 'kvstore_dist.h', 'mach/clock.h', 'mach/mach.h', + 'malloc.h', 'mkl.h', 'mkl_cblas.h', 'mkl_vsl.h', 'mkl_vsl_functions.h', + 'nvml.h', 'opencv2/opencv.hpp', 'sys/stat.h', 'sys/types.h' + ] + +if len(sys.argv) < 4: + print("Usage: [minumum=0]\n" + "Minimum means no blas, no sse, no dependency, may run twice slower.") + exit(0) + +minimum = int(sys.argv[4]) if len(sys.argv) > 4 else 0 + +if minimum: + blacklist += ['packet/sse-inl.h', 'emmintrin.h'] + +def get_sources(def_file): + sources = [] + files = [] + visited = set() + for line in open(def_file): + files = files + line.strip().split(' ') + + for f in files: + f = f.strip() + if not f or f.endswith('.o') or f == '\\': continue + fn = os.path.relpath(f) + if fn.find('/usr/') < 0 and fn not in visited: + sources.append(fn) + visited.add(fn) + return sources + +sources = get_sources(sys.argv[1]) + +def find_source(name, start): + candidates = [] + for x in sources: + if x == name or x.endswith('/' + name): candidates.append(x) + if not candidates: return '' + if len(candidates) == 1: return candidates[0] + for x in candidates: + if x.split('/')[1] == start.split('/')[1]: return x + return '' + + +re1 = re.compile('<([./a-zA-Z0-9_-]*)>') +re2 = re.compile('"([./a-zA-Z0-9_-]*)"') + +sysheaders = [] +history = set([]) +out = StringIO.StringIO() + +def expand(x, pending): + if x in history and x not in ['mshadow/mshadow/expr_scalar-inl.h']: # MULTIPLE includes + return + + if x in pending: + #print 'loop found: %s in ' % x, pending + return + + print >>out, "//===== EXPANDIND: %s =====\n" %x + for line in open(x): + if line.find('#include') < 0: + out.write(line) + continue + if line.strip().find('#include') > 0: + print line + continue + m = re1.search(line) + if not m: m = re2.search(line) + if not m: + print line + ' not found' + continue + h = m.groups()[0].strip('./') + source = find_source(h, x) + if not source: + if h not in blacklist and h not in sysheaders: sysheaders.append(h) + else: + expand(source, pending + [x]) + print >>out, "//===== EXPANDED: %s =====\n" %x + history.add(x) + + +expand(sys.argv[2], []) + +f = open(sys.argv[3], 'wb') + +if minimum != 0: + print >>f, "#define MSHADOW_STAND_ALONE 1" + print >>f, "#define MSHADOW_USE_SSE 0" + print >>f, "#define MSHADOW_USE_CBLAS 0" + +print >>f, ''' +#if defined(__MACH__) +#include +#include +#endif + +#if !defined(__WIN32__) +#include +#include + +#if !defined(__ANDROID__) && (!defined(MSHADOW_USE_SSE) || MSHADOW_USE_SSE == 1) +#include +#endif + +#endif +''' + +for k in sorted(sysheaders): + print >>f, "#include <%s>" % k + +print >>f, '' +print >>f, out.getvalue() + +for x in sources: + if x not in history and not x.endswith('.o'): + print 'Not processed:', x + diff --git a/amalgamation/mxnet_predict0.cc b/amalgamation/mxnet_predict0.cc new file mode 100644 index 000000000000..4397308177d2 --- /dev/null +++ b/amalgamation/mxnet_predict0.cc @@ -0,0 +1,48 @@ +// mexnet.cc + +#define MSHADOW_FORCE_STREAM +#ifndef MSHADOW_USE_CBLAS +#define MSHADOW_USE_CBLAS 1 +#endif +#define MSHADOW_USE_CUDA 0 +#define MSHADOW_USE_MKL 0 +#define MSHADOW_RABIT_PS 0 +#define MSHADOW_DIST_PS 0 + +#define MXNET_USE_OPENCV 0 +#define MXNET_PREDICT_ONLY 1 +#define DISABLE_OPENMP 1 + +#include "src/ndarray/unary_function.cc" +#include "src/ndarray/ndarray_function.cc" +#include "src/ndarray/ndarray.cc" +#include "src/engine/engine.cc" +#include "src/engine/naive_engine.cc" +#include "src/symbol/graph_executor.cc" +#include "src/symbol/static_graph.cc" +#include "src/symbol/symbol.cc" +#include "src/operator/operator.cc" +#include "src/operator/activation.cc" +#include "src/operator/batch_norm.cc" +#include "src/operator/block_grad.cc" +#include "src/operator/concat.cc" +#include "src/operator/convolution.cc" +#include "src/operator/dropout.cc" +#include "src/operator/elementwise_binary_op.cc" +#include "src/operator/elementwise_sum.cc" +#include "src/operator/fully_connected.cc" +#include "src/operator/leaky_relu.cc" +#include "src/operator/lrn.cc" +#include "src/operator/pooling.cc" +#include "src/operator/regression_output.cc" +#include "src/operator/reshape.cc" +#include "src/operator/slice_channel.cc" +#include "src/operator/softmax_output.cc" +#include "src/operator/deconvolution.cc" +#include "src/storage/storage.cc" +#include "src/common/tblob_op_registry.cc" + +#include "src/resource.cc" + +#include "src/c_api/c_predict_api.cc" +#include "src/c_api/c_api_error.cc" diff --git a/doc/R-package/fiveMinutesNeuralNetwork.md b/doc/R-package/fiveMinutesNeuralNetwork.md index a58eafa62474..e9018ecf6374 100644 --- a/doc/R-package/fiveMinutesNeuralNetwork.md +++ b/doc/R-package/fiveMinutesNeuralNetwork.md @@ -61,8 +61,8 @@ fc1 <- mx.symbol.FullyConnected(data, num_hidden=20) act1 <- mx.symbol.Activation(fc1, act_type="tanh") fc2 <- mx.symbol.FullyConnected(act1, num_hidden=2) -# Softmax function for the output layer -softmax <- mx.symbol.Softmax(fc2) +# SoftmaxOutput means multi-class probability prediction. +softmax <- mx.symbol.SoftmaxOutput(fc2) ``` According to the comments in the code, you can see the meaning of each function and its arguments. They can be easily modified according to your need. @@ -163,7 +163,7 @@ data <- mx.symbol.Variable("data") # num_hidden: number of neurons in this hidden layer fc1 <- mx.symbol.FullyConnected(data, num_hidden=1) -# Softmax function for the output layer +# Use linear regression for the output layer lro <- mx.symbol.LinearRegressionOutput(fc1) ``` diff --git a/doc/R-package/mnistCompetition.md b/doc/R-package/mnistCompetition.md index 16a7ca761146..95fff099bd04 100644 --- a/doc/R-package/mnistCompetition.md +++ b/doc/R-package/mnistCompetition.md @@ -51,7 +51,7 @@ table(train.y) ``` ## train.y -## 0 1 2 3 4 5 6 7 8 9 +## 0 1 2 3 4 5 6 7 8 9 ## 4132 4684 4177 4351 4072 3795 4137 4401 4063 4188 ``` @@ -67,7 +67,7 @@ act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu") fc2 <- mx.symbol.FullyConnected(act1, name="fc2", num_hidden=64) act2 <- mx.symbol.Activation(fc2, name="relu2", act_type="relu") fc3 <- mx.symbol.FullyConnected(act2, name="fc3", num_hidden=10) -softmax <- mx.symbol.Softmax(fc3, name="sm") +softmax <- mx.symbol.SoftmaxOutput(fc3, name="sm") ``` 1. In `mxnet`, we use its own data type `symbol` to configure the network. `data <- mx.symbol.Variable("data")` use `data` to represent the input data, i.e. the input layer. @@ -177,7 +177,7 @@ table(pred.label) ``` ## pred.label -## 0 1 2 3 4 5 6 7 8 9 +## 0 1 2 3 4 5 6 7 8 9 ## 2818 3195 2744 2767 2683 2596 2798 2790 2784 2825 ``` @@ -216,7 +216,7 @@ tanh3 <- mx.symbol.Activation(data=fc1, act_type="tanh") # second fullc fc2 <- mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss -lenet <- mx.symbol.Softmax(data=fc2) +lenet <- mx.symbol.SoftmaxOutput(data=fc2) ``` Then let us reshape the matrices into arrays: @@ -233,7 +233,7 @@ Next we are going to compare the training speed on different devices, so the def ```r -n.gpu <- 1 +n.gpu <- 1 device.cpu <- mx.cpu() device.gpu <- lapply(0:(n.gpu-1), function(i) { mx.gpu(i) @@ -266,11 +266,11 @@ model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, ``` ```r -print(proc.time() - tic) +print(proc.time() - tic) ``` ``` -## user system elapsed +## user system elapsed ## 130.030 204.976 83.821 ``` @@ -317,11 +317,11 @@ model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y, ``` ```r -print(proc.time() - tic) +print(proc.time() - tic) ``` ``` -## user system elapsed +## user system elapsed ## 9.288 1.680 6.889 ``` diff --git a/doc/python/model.md b/doc/python/model.md index fca3a39ec893..f6f27c99d082 100644 --- a/doc/python/model.md +++ b/doc/python/model.md @@ -23,7 +23,7 @@ data = mx.symbol.Variable('data') fc1 = mx.symbol.FullyConnected(data, name='fc1', num_hidden=128) act1 = mx.symbol.Activation(fc1, name='relu1', act_type='relu') fc2 = mx.symbol.FullyConnected(act1, name='fc2', num_hidden=64) -softmax = mx.symbol.Softmax(fc2, name='sm') +softmax = mx.symbol.SoftmaxOutput(fc2, name='sm') # create a model model = mx.model.FeedForward.create( softmax, diff --git a/doc/python/symbol.md b/doc/python/symbol.md index b153fdb32773..fc8a0a99b8dd 100644 --- a/doc/python/symbol.md +++ b/doc/python/symbol.md @@ -23,7 +23,7 @@ The following code gives an example of two layer neural network configuration. >>> net = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) >>> net = mx.symbol.Activation(data=net, name='relu1', act_type="relu") >>> net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) ->>> net = mx.symbol.Softmax(data=net, name='out') +>>> net = mx.symbol.SoftmaxOutput(data=net, name='out') >>> type(net) ``` @@ -68,7 +68,7 @@ You can use [mxnet.symbol.Group](#mxnet.symbol.Group) function to group the symb >>> fc1 = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) >>> net = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu") >>> net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) ->>> out = mx.symbol.Softmax(data=net, name='softmax') +>>> out = mx.symbol.SoftmaxOutput(data=net, name='softmax') >>> group = mx.symbol.Group([fc1, out]) >>> group.list_outputs() ['fc1_output', 'softmax_output'] @@ -102,7 +102,7 @@ Before you get started, you can check the list of functions in the following tab mxnet.symbol.LeakyReLU mxnet.symbol.Pooling mxnet.symbol.Reshape - mxnet.symbol.Softmax + mxnet.symbol.SoftmaxOutput ``` ```eval_rst diff --git a/doc/python/tutorial.md b/doc/python/tutorial.md index 09a70df07c04..7620afe0c8cb 100644 --- a/doc/python/tutorial.md +++ b/doc/python/tutorial.md @@ -227,7 +227,7 @@ The following codes create a two layer perceptrons network: >>> net = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) >>> net = mx.symbol.Activation(data=net, name='relu1', act_type="relu") >>> net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) ->>> net = mx.symbol.Softmax(data=net, name='out') +>>> net = mx.symbol.SoftmaxOutput(data=net, name='out') >>> type(net) ``` diff --git a/example/cifar10/cifar10.py b/example/cifar10/cifar10.py index efb1122504a0..533f6714bd96 100644 --- a/example/cifar10/cifar10.py +++ b/example/cifar10/cifar10.py @@ -55,7 +55,7 @@ def SimpleFactory(data, ch_1x1, ch_3x3): pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="global_pool") flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") -softmax = mx.symbol.Softmax(data=fc, name="loss") +softmax = mx.symbol.SoftmaxOutput(data=fc, name="loss") ######################################################### diff --git a/example/imagenet/alexnet.py b/example/imagenet/alexnet.py index dbf5e9a28ba4..b933b090e5b2 100644 --- a/example/imagenet/alexnet.py +++ b/example/imagenet/alexnet.py @@ -40,7 +40,7 @@ dropout2 = mx.symbol.Dropout(data=relu7, p=0.5) # stage 6 fc3 = mx.symbol.FullyConnected(data=dropout2, num_hidden=1000) -softmax = mx.symbol.Softmax(data=fc3) +softmax = mx.symbol.SoftmaxOutput(data=fc3) ## data diff --git a/example/imagenet/inception-full.py b/example/imagenet/inception-full.py index d703a6db59a2..1ac0a5c14a68 100644 --- a/example/imagenet/inception-full.py +++ b/example/imagenet/inception-full.py @@ -74,7 +74,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax softmax = inception(21841, 1.0) diff --git a/example/imagenet/inception.py b/example/imagenet/inception.py index 263f3a22733f..a9afe9c01f89 100644 --- a/example/imagenet/inception.py +++ b/example/imagenet/inception.py @@ -73,7 +73,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax softmax = inception(1000, 1.0) diff --git a/example/memcost/inception_memcost.py b/example/memcost/inception_memcost.py index 8183c6774724..eb9e16908035 100644 --- a/example/memcost/inception_memcost.py +++ b/example/memcost/inception_memcost.py @@ -69,7 +69,7 @@ def inception(nhidden, grad_scale): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') return softmax diff --git a/example/mnist/lenet.py b/example/mnist/lenet.py index 40779150ccfb..d8691bbe5867 100644 --- a/example/mnist/lenet.py +++ b/example/mnist/lenet.py @@ -23,7 +23,7 @@ # second fullc fc2 = mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss -lenet = mx.symbol.Softmax(data=fc2) +lenet = mx.symbol.SoftmaxOutput(data=fc2) ## data train, val = mnist_iterator(batch_size=100, input_shape=(1,28,28)) diff --git a/example/mnist/mlp.py b/example/mnist/mlp.py index 0cfffe55cbe4..2bfa55d913ba 100644 --- a/example/mnist/mlp.py +++ b/example/mnist/mlp.py @@ -11,7 +11,7 @@ fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) -mlp = mx.symbol.Softmax(data = fc3, name = 'mlp') +mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp') # data diff --git a/example/mnist/mlp_numpy.py b/example/mnist/mlp_numpy.py index 114a6bf257d5..538aa87c7c23 100644 --- a/example/mnist/mlp_numpy.py +++ b/example/mnist/mlp_numpy.py @@ -11,7 +11,7 @@ fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) -mlp = mx.symbol.Softmax(data = fc3, name = 'mlp') +mlp = mx.symbol.SoftmaxOutput(data = fc3, name = 'mlp') # data diff --git a/example/notebooks/cifar-100.ipynb b/example/notebooks/cifar-100.ipynb index 8e8c53a2d75b..bb5cb1b81624 100644 --- a/example/notebooks/cifar-100.ipynb +++ b/example/notebooks/cifar-100.ipynb @@ -131,7 +131,7 @@ " # linear classifier\n", " flatten = mx.symbol.Flatten(data=avg, name='flatten')\n", " fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc')\n", - " softmax = mx.symbol.Softmax(data=fc1, name='softmax')\n", + " softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')\n", " return softmax\n", "\n", "softmax = inception(100, 1.0)" diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index eae38dab736c..7c436554fa47 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -127,7 +127,7 @@ "pool = mx.symbol.Pooling(data=in5b, pool_type=\"avg\", kernel=(7,7), name=\"global_avg\")\n", "flatten = mx.symbol.Flatten(data=pool)\n", "fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10)\n", - "softmax = mx.symbol.Softmax(data=fc)" + "softmax = mx.symbol.SoftmaxOutput(data=fc)" ] }, { diff --git a/example/notebooks/composite_symbol.ipynb b/example/notebooks/composite_symbol.ipynb index 22966f5fd3f5..1d2cdaec764d 100644 --- a/example/notebooks/composite_symbol.ipynb +++ b/example/notebooks/composite_symbol.ipynb @@ -3691,7 +3691,7 @@ "\n", "softmax0\n", "\n", - "Softmax\n", + "SoftmaxOutput\n", "\n", "\n", "softmax0->fullyconnected0\n", @@ -3739,7 +3739,7 @@ "# linear classifier\n", "flatten = mx.symbol.Flatten(data=avg)\n", "fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=1000)\n", - "softmax = mx.symbol.Softmax(data=fc1)\n", + "softmax = mx.symbol.SoftmaxOutput(data=fc1)\n", "\n", "# if you like, you can visualize full network structure\n", "mx.viz.plot_network(symbol=softmax, shape={\"data\" : (128, 3, 224, 224)})" diff --git a/example/numpy-ops/data.py b/example/numpy-ops/data.py new file mode 100644 index 000000000000..d39821f52145 --- /dev/null +++ b/example/numpy-ops/data.py @@ -0,0 +1,32 @@ +# pylint: skip-file +""" data iterator for mnist """ +import sys +import os +# code to automatically download dataset +curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) +sys.path.append(os.path.join(curr_path, "../../tests/python/common")) +import get_data +import mxnet as mx + +def mnist_iterator(batch_size, input_shape): + """return train and val iterators for mnist""" + # download data + get_data.GetMNIST_ubyte() + flat = False if len(input_shape) == 3 else True + + train_dataiter = mx.io.MNISTIter( + image="data/train-images-idx3-ubyte", + label="data/train-labels-idx1-ubyte", + input_shape=input_shape, + batch_size=batch_size, + shuffle=True, + flat=flat) + + val_dataiter = mx.io.MNISTIter( + image="data/t10k-images-idx3-ubyte", + label="data/t10k-labels-idx1-ubyte", + input_shape=input_shape, + batch_size=batch_size, + flat=flat) + + return (train_dataiter, val_dataiter) diff --git a/example/numpy-ops/numpy_softmax.py b/example/numpy-ops/numpy_softmax.py new file mode 100644 index 000000000000..1ea5f0051061 --- /dev/null +++ b/example/numpy-ops/numpy_softmax.py @@ -0,0 +1,60 @@ +# pylint: skip-file +from data import mnist_iterator +import mxnet as mx +import numpy as np +import logging + + +class NumpySoftmax(mx.operator.NumpyOp): + def need_top_grad(self): + return False + + def list_arguments(self): + return ['data', 'label'] + + def list_outputs(self): + return ['prob'] + + def infer_shape(self, in_shape): + return [in_shape[0], (in_shape[0][0],)], [in_shape[0]] + + def forward(self, in_data, out_data): + x = in_data[0] + y = out_data[0] + y[:] = np.exp(x - x.max(axis=1).reshape((x.shape[0], 1))) + y /= y.sum(axis=1).reshape((x.shape[0], 1)) + + def backward(self, out_grad, in_data, out_data, in_grad): + l = in_data[1] + l = l.reshape((l.size,)).astype(np.int) + y = out_data[0] + dx = in_grad[0] + dx[:] = y + dx[np.arange(l.shape[0]), l] -= 1.0 + +# define mlp + +data = mx.symbol.Variable('data') +fc1 = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128) +act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu") +fc2 = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64) +act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu") +fc3 = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=10) +#mlp = mx.symbol.Softmax(data = fc3, name = 'mlp') +mysoftmax = NumpySoftmax() +mlp = mysoftmax(data=fc3, name = 'mlp') + +# data + +train, val = mnist_iterator(batch_size=100, input_shape = (784,)) + +# train + +logging.basicConfig(level=logging.DEBUG) + +model = mx.model.FeedForward( + ctx = mx.gpu(), symbol = mlp, num_epoch = 20, + learning_rate = 0.1, momentum = 0.9, wd = 0.00001) + +model.fit(X=train, eval_data=val) + diff --git a/example/python-howto/multiple_outputs.py b/example/python-howto/multiple_outputs.py index ab6d6d12356c..97ce469d58a2 100644 --- a/example/python-howto/multiple_outputs.py +++ b/example/python-howto/multiple_outputs.py @@ -8,7 +8,7 @@ fc1 = mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128) net = mx.symbol.Activation(data=fc1, name='relu1', act_type="relu") net = mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64) -out = mx.symbol.Softmax(data=net, name='softmax') +out = mx.symbol.SoftmaxOutput(data=net, name='softmax') # group fc1 and out together group = mx.symbol.Group([fc1, out]) print group.list_outputs() diff --git a/example/rnn/lstm.py b/example/rnn/lstm.py index 25245aad18ee..4b5706ff7208 100644 --- a/example/rnn/lstm.py +++ b/example/rnn/lstm.py @@ -83,7 +83,7 @@ def lstm_unroll(num_lstm_layer, seq_len, bias=cls_bias, num_hidden=num_label, name="t%d_cls" % seqidx) - sm = mx.sym.Softmax(data=fc, label=label, name="t%d_sm" % seqidx) + sm = mx.sym.SoftmaxOutput(data=fc, label=label, name="t%d_sm" % seqidx) out_prob.append(sm) for i in range(num_lstm_layer): @@ -216,7 +216,7 @@ def train_lstm(model, X_train_batch, X_val_batch, set_rnn_inputs(m, X_train_batch, begin=begin) m.rnn_exec.forward(is_train=True) # probability of each label class, used to evaluate nll - seq_label_probs = [mx.nd.choose_element(out, label).copyto(mx.cpu()) + seq_label_probs = [mx.nd.choose_element_0index(out, label).copyto(mx.cpu()) for out, label in zip(m.seq_outputs, m.seq_labels)] m.rnn_exec.backward() # transfer the states @@ -251,7 +251,7 @@ def train_lstm(model, X_train_batch, X_val_batch, set_rnn_inputs(m, X_val_batch, begin=begin) m.rnn_exec.forward(is_train=False) # probability of each label class, used to evaluate nll - seq_label_probs = [mx.nd.choose_element(out, label).copyto(mx.cpu()) + seq_label_probs = [mx.nd.choose_element_0index(out, label).copyto(mx.cpu()) for out, label in zip(m.seq_outputs, m.seq_labels)] # transfer the states for init, last in zip(m.init_states, m.last_states): diff --git a/include/mxnet/base.h b/include/mxnet/base.h index 1eeffc1ab4b9..52100cdf05ea 100644 --- a/include/mxnet/base.h +++ b/include/mxnet/base.h @@ -50,8 +50,8 @@ #endif /*! -* \brief define dllexport for Visual Studio -*/ + * \brief define dllexport for Visual Studio + */ #ifdef _MSC_VER #ifdef MXNET_EXPORTS #define MXNET_API __declspec(dllexport) @@ -62,6 +62,14 @@ #define MXNET_API #endif +/*! + * \brief define prediction only + */ +#ifndef MXNET_PREDICT_ONLY +#define MXNET_PREDICT_ONLY 0 +#endif + + /*! \brief namespace of mxnet */ namespace mxnet { /*! \brief mxnet cpu */ diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 2bbda3ddbf0e..84754977ba3e 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -48,6 +48,16 @@ typedef void *DataIterHandle; typedef void *KVStoreHandle; /*! \brief handle to RecordIO */ typedef void *RecordIOHandle; + +MXNET_EXTERN_C { +struct NativeOpInfo { + void (*forward)(int, float**, int*, unsigned**, int*); + void (*backward)(int, float**, int*, unsigned**, int*); + void (*infer_shape)(int, int*, unsigned**); + void (*list_outputs)(char***); + void (*list_arguments)(char***); +}; +} /*! * \brief return str message of the last error * all function in this file will return 0 when success diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h index e5671da33cbc..9163a6c3e910 100644 --- a/include/mxnet/c_predict_api.h +++ b/include/mxnet/c_predict_api.h @@ -9,6 +9,8 @@ #ifdef __cplusplus #define MXNET_EXTERN_C extern "C" +#else +#define MXNET_EXTERN_C #endif #ifdef _WIN32 diff --git a/mshadow b/mshadow index 27ba6a635e81..74be312ab6f2 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit 27ba6a635e81ac6e9f0f30a1ab1bf1d32e56f7d8 +Subproject commit 74be312ab6f20178766901a7caf021d4829e9110 diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index a036e003ba77..abd842748a57 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -13,6 +13,7 @@ from . import symbol from . import io from . import recordio +from . import operator # use mx.nd as short for mx.ndarray from . import ndarray as nd # use mx.rnd as short for mx.random diff --git a/python/mxnet/base.py b/python/mxnet/base.py index d6aec6509b85..4ae621a86c7f 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -105,7 +105,6 @@ def c_array(ctype, values): """ return (ctype * len(values))(*values) - def ctypes2buffer(cptr, length): """Convert ctypes pointer to buffer type. @@ -197,7 +196,6 @@ def ctypes2docstring(num_args, arg_names, arg_types, arg_descs, remove_dup=True) doc_str = doc_str % ('\n'.join(param_str)) return doc_str - def _notify_shutdown(): """Notify MXNet about a shutdown.""" check_call(_LIB.MXNotifyShutdown()) diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py index dde8f40db9c5..8d08e40ba7d3 100644 --- a/python/mxnet/callback.py +++ b/python/mxnet/callback.py @@ -1,5 +1,5 @@ # coding: utf-8 -"""Callback functions that can be used to track various status during iteration.""" +"""Callback functions that can be used to track various status during epoch.""" from __future__ import absolute_import import sys @@ -9,7 +9,7 @@ from .model import save_checkpoint def do_checkpoint(prefix): - """Callback to checkpoint the model to prefix every iteration. + """Callback to checkpoint the model to prefix every epoch. Parameters ---------- @@ -45,7 +45,7 @@ def _callback(param): if param.nbatch % period == 0: name, value = param.eval_metric.get() logging.info('Iter[%d] Batch[%d] Train-%s=%f', - param.iteration, param.nbatch, name, value) + param.epoch, param.nbatch, name, value) return _callback @@ -77,7 +77,7 @@ def __call__(self, param): if count % self.frequent == 0: speed = self.frequent * self.batch_size / (time.time() - self.tic) logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec", - param.iteration, count, speed) + param.epoch, count, speed) self.tic = time.time() else: self.init = True diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py new file mode 100644 index 000000000000..1cdd4277d894 --- /dev/null +++ b/python/mxnet/operator.py @@ -0,0 +1,186 @@ +# coding: utf-8 +# pylint: disable=invalid-name, protected-access, too-many-arguments, no-self-use +"""numpy interface for operators.""" +from __future__ import absolute_import + +from ctypes import CFUNCTYPE, POINTER, Structure, pointer, c_void_p, cast, c_int, c_char, c_char_p +from .base import c_array, c_str, mx_uint, mx_float, ctypes2numpy_shared +from . import symbol + +class NumpyOp(object): + """Base class for numpy operators. numpy operators allow parts + of computation in symbolic graph to be writen in numpy. This feature + is intended for quickly hacking out a solution for non performance + critical parts. Please consider write a c++ implementation if it becomes + a bottleneck. + """ + def __init__(self): + self.info_ = None + + def __call__(self, *args, **kwargs): + return self.get_symbol(*args, **kwargs) + + def get_symbol(self, *args, **kwargs): + """Create a symbol from numpy operator. + This Should only be called once per instance if operator contains + internal states. + + Parameters + ---------- + args : list + a list of input arguments (symbols) + + Returns + ------- + sym : mxnet.symbol.Symbol + """ + fb_functype = CFUNCTYPE(None, c_int, POINTER(POINTER(mx_float)), POINTER(c_int), + POINTER(POINTER(mx_uint)), POINTER(c_int)) + infer_functype = CFUNCTYPE(None, c_int, POINTER(c_int), POINTER(POINTER(mx_uint))) + list_functype = CFUNCTYPE(None, POINTER(POINTER(POINTER(c_char)))) + class NumpyOpInfo(Structure): + """Structure that holds Callback information. Passed to NumpyOpProp""" + _fields_ = [ + ('forward', fb_functype), + ('backward', fb_functype), + ('infer_shape', infer_functype), + ('list_outputs', list_functype), + ('list_arguments', list_functype) + ] + def forward_entry(num_tensor, tensor_ptrs, tensor_dims, + tensor_shapes, tensor_tags): + """C Callback for NumpyOp::Forward""" + tensors = [[] for i in range(4)] + for i in range(num_tensor): + shape = [tensor_shapes[i][j] for j in range(tensor_dims[i])] + buff = ctypes2numpy_shared(tensor_ptrs[i], shape) + tensors[tensor_tags[i]].append(buff) + self.forward(in_data=tensors[0], out_data=tensors[1]) + + def backward_entry(num_tensor, tensor_ptrs, tensor_dims, + tensor_shapes, tensor_tags): + """C Callback for NumpyOp::Backward""" + tensors = [[] for i in range(4)] + for i in range(num_tensor): + shape = [tensor_shapes[i][j] for j in range(tensor_dims[i])] + buff = ctypes2numpy_shared(tensor_ptrs[i], shape) + tensors[tensor_tags[i]].append(buff) + self.backward(in_data=tensors[0], out_data=tensors[1], + in_grad=tensors[2], out_grad=tensors[3]) + + def infer_shape_entry(num_tensor, tensor_dims, + tensor_shapes): + """C Callback for NumpyOpProp::InferShape""" + n_in = len(self.list_arguments()) + n_out = len(self.list_outputs()) + assert num_tensor == n_in + n_out + + shapes = [[tensor_shapes[i][j] for j in range(tensor_dims[i])] for i in range(n_in)] + ishape, oshape = self.infer_shape(shapes) + assert len(oshape) == n_out + assert len(ishape) == n_in + rshape = list(ishape) + list(oshape) + for i in range(n_in+n_out): + tensor_shapes[i] = cast(c_array(mx_uint, rshape[i]), POINTER(mx_uint)) + tensor_dims[i] = len(rshape[i]) + + def list_outputs_entry(out): + """C Callback for NumpyOpProp::ListOutputs""" + ret = self.list_outputs() + ret = [c_str(i) for i in ret] + [c_char_p(0)] + ret = c_array(c_char_p, ret) + out[0] = cast(ret, POINTER(POINTER(c_char))) + + def list_arguments_entry(out): + """C Callback for NumpyOpProp::ListArguments""" + ret = self.list_arguments() + ret = [c_str(i) for i in ret] + [c_char_p(0)] + ret = c_array(c_char_p, ret) + out[0] = cast(ret, POINTER(POINTER(c_char))) + + + self.info_ = NumpyOpInfo(fb_functype(forward_entry), + fb_functype(backward_entry), + infer_functype(infer_shape_entry), + list_functype(list_outputs_entry), + list_functype(list_arguments_entry)) + cb_ptr = hex(cast(pointer(self.info_), c_void_p).value) + # pylint: disable=E1101 + return symbol.Symbol._Native(*args, + info=cb_ptr, + need_top_grad=self.need_top_grad(), + **kwargs) + + def forward(self, in_data, out_data): + """forward interface. override to create new operators + + Parameters + ---------- + in_data, out_data: list + input and output for forward. See document for + corresponding arguments of Operator::Forward + """ + out_data[0][:] = in_data[0] + + def backward(self, out_grad, in_data, out_data, in_grad): + """backward interface. override to create new operators + + Parameters + ---------- + out_grad, in_data, out_data, in_grad : list + input and output for backward. See document for + corresponding arguments of Operator::Backward + """ + # pylint: disable=W0613 + in_grad[0][:] = 1.0 + + def infer_shape(self, in_shape): + """infer_shape interface. override to create new operators + + Parameters + ---------- + in_shape : list + list of argument shapes in the same order as + declared in list_arguments. + + Returns + ------- + in_shape : list + list of argument shapes. Can be modified from in_shape. + out_shape : list + list of output shapes calculated from in_shape, + in the same order as declared in list_arguments. + """ + return in_shape, [in_shape[0]] + + def list_outputs(self): + """list_outputs interface. override to create new operators + + Returns + ------- + outputs : list + list of output blob names. + """ + return ['output'] + + def list_arguments(self): + """list_arguments interface. override to create new operators + + Returns + ------- + in_shape : list + list of argument shapes in the same order as + declared in list_arguments. + """ + return ['data'] + + def need_top_grad(self): + """Whether this operator needs out_grad for backward. + + Returns + ------- + need_top_grad : bool + Whether this operator needs out_grad for backward. + Should be set to False for loss layers. + """ + return True diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index aaac3ee61d08..8706ac1cc86c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -972,8 +972,9 @@ int MXKVStoreIsSchedulerNode(int *ret) { int MXKVStoreRunServer(KVStoreHandle handle, MXKVStoreServerController controller) { API_BEGIN(); - auto ctrl = [controller](int head, const std::string& body) { - controller(head, body.c_str()); + MXKVStoreServerController *controller_temp = controller; + auto ctrl = [controller_temp](int head, const std::string& body) { + controller_temp(head, body.c_str()); }; static_cast(handle)->RunServer(ctrl); API_END(); diff --git a/src/common/tblob_op_registry.cc b/src/common/tblob_op_registry.cc index 8dac8944f144..064cc4b1cc6f 100644 --- a/src/common/tblob_op_registry.cc +++ b/src/common/tblob_op_registry.cc @@ -252,7 +252,7 @@ class TBlobUnaryOpProp : public OperatorProperty { } } - Operator* CreateOperator(Context ctx) const { + Operator* CreateOperator(Context ctx) const override { size_t dev_mask = ctx.dev_mask(); TBlobUnaryOperator *op = new TBlobUnaryOperator(); CHECK(dev_mask < source->funary_.size() && source->funary_[dev_mask] != nullptr); diff --git a/src/engine/engine.cc b/src/engine/engine.cc index eececfa91e04..ae72861260e1 100644 --- a/src/engine/engine.cc +++ b/src/engine/engine.cc @@ -17,6 +17,7 @@ inline Engine* CreateEngine() { std::string stype = type; Engine *ret = nullptr; + #if MXNET_PREDICT_ONLY == 0 if (stype == "NaiveEngine") { ret = CreateNaiveEngine(); } else if (stype == "ThreadedEngine") { @@ -24,6 +25,9 @@ inline Engine* CreateEngine() { } else if (stype == "ThreadedEnginePerDevice") { ret = CreateThreadedEnginePerDevice(); } + #else + ret = CreateNaiveEngine(); + #endif if (ret ==nullptr) { LOG(FATAL) << "Cannot find Engine " << type; diff --git a/src/engine/engine_impl.h b/src/engine/engine_impl.h index 44452df7b9c5..9d3fc4cd09f7 100644 --- a/src/engine/engine_impl.h +++ b/src/engine/engine_impl.h @@ -71,10 +71,12 @@ static constexpr std::size_t kMaxNumGPUs = 16; // predeclare factory function for each type of engine /*! \return NaiveEngine instance */ Engine *CreateNaiveEngine(); +#if MXNET_PREDICT_ONLY == 0 /*! \return ThreadedEnginePooled instance */ Engine *CreateThreadedEnginePooled(); /*! \return ThreadedEnginePerDevie instance */ Engine *CreateThreadedEnginePerDevice(); +#endif } // namespace engine } // namespace mxnet #endif // MXNET_ENGINE_ENGINE_IMPL_H_ diff --git a/src/kvstore/kvstore_dist_server.h b/src/kvstore/kvstore_dist_server.h index d25d6d95d989..0bf446d50ca5 100644 --- a/src/kvstore/kvstore_dist_server.h +++ b/src/kvstore/kvstore_dist_server.h @@ -39,9 +39,9 @@ class Executor { lk.unlock(); if (blk.f) { - blk.f(); blk.p.set_value(); + blk.f(); blk.p->set_value(); } else { - blk.p.set_value(); break; + blk.p->set_value(); break; } lk.lock(); } @@ -57,7 +57,7 @@ class Executor { */ void Exec(const Func& func) { Block blk(func); - auto fut = blk.p.get_future(); + auto fut = blk.p->get_future(); { std::lock_guard lk(mu_); queue_.push(std::move(blk)); @@ -75,9 +75,9 @@ class Executor { private: struct Block { - explicit Block(const Func& func) : f(func) { } + explicit Block(const Func& func) : f(func), p(std::make_shared>()) { } Func f; - std::promise p; + std::shared_ptr> p; }; std::queue queue_; std::mutex mu_; diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 404c0891f984..9ea7321195c0 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -595,6 +595,7 @@ void NDArray::SyncCopyToCPU(real_t *data, size_t size) const { } } +#if MXNET_PREDICT_ONLY == 0 // register API function // those with underscore will be registered at NDArray MXNET_REGISTER_NDARRAY_FUN(_set_value).set_function(SetValueOp); @@ -610,10 +611,11 @@ MXNET_REGISTER_NDARRAY_FUN(dot).set_function(BinaryOp) MXNET_REGISTER_NDARRAY_FUN(_onehot_encode).set_function(BinaryOp); -MXNET_REGISTER_NDARRAY_FUN(choose_element) +MXNET_REGISTER_NDARRAY_FUN(choose_element_0index) .set_function(BinaryOp) .describe("Choose one element from each line(row for python, column for R/Julia)" - " in lhs according to index indicated by rhs"); + " in lhs according to index indicated by rhs." + " This function assume rhs uses 0-based index."); // register API function // those with underscore will be registered at NDArray @@ -659,4 +661,5 @@ MXNET_REGISTER_NDARRAY_FUN(clip) .add_argument("src", "NDArray", "Source input") .add_argument("a_min", "real_t", "Minimum value") .add_argument("a_max", "real_t", "Maximum value"); +#endif } // namespace mxnet diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h index 6280c1664e84..cca8d7824697 100644 --- a/src/operator/activation-inl.h +++ b/src/operator/activation-inl.h @@ -144,7 +144,7 @@ class ActivationProp : public OperatorProperty { return {{in_data[activation::kData], out_data[activation::kOut]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: ActivationParam param_; diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h index f031058f899e..8ae6d30a50bb 100644 --- a/src/operator/batch_norm-inl.h +++ b/src/operator/batch_norm-inl.h @@ -273,7 +273,7 @@ class BatchNormProp : public OperatorProperty { return {"moving_mean", "moving_var"}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: BatchNormParam param_; diff --git a/src/operator/block_grad-inl.h b/src/operator/block_grad-inl.h index 012dc7a2da63..ff5262d4e04a 100644 --- a/src/operator/block_grad-inl.h +++ b/src/operator/block_grad-inl.h @@ -102,7 +102,7 @@ class BlockGradientProp : public OperatorProperty { return {{in_data[blockgrad::kData], out_data[blockgrad::kOut]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; }; // class BlockGradientProperty #endif // DMLC_USE_CXX11 diff --git a/src/operator/channel_op_common.h b/src/operator/channel_op_common.h index cd8b972a9792..72609ba51b96 100644 --- a/src/operator/channel_op_common.h +++ b/src/operator/channel_op_common.h @@ -14,41 +14,46 @@ namespace mxnet { namespace op { -using mshadow::expr::concat; - - template inline void Concatenate(const std::vector > &input, mshadow::Tensor *output) { + using mshadow::expr::concat; + using mshadow::expr::slice; mshadow::Tensor out = *output; size_t size = input.size(); switch (size) { - case 2: + case 2: { out = concat<1>(input[0], input[1]); break; - case 3: + } + case 3: { out = concat<1>(input[0], concat<1>(input[1], input[2])); break; - case 4: + } + case 4: { out = concat<1>(input[0], concat<1>(input[1], concat<1>(input[2], input[3]))); break; - case 5: - out = concat<1>(input[0], - concat<1>(input[1], - concat<1>(input[2], - concat<1>(input[3], input[4])))); + } + default: { + index_t begin = 0; + for (index_t i = 0; i < size; ++i) { + index_t end = begin + input[i].size(1); + slice<1>(out, begin, end) = input[i]; + begin = end; + } break; - default: - LOG(FATAL) << "Incorrect concat size: " << size; + } } } template void Split(const mshadow::Tensor &input, std::vector > *output) { + using mshadow::expr::concat; + using mshadow::expr::slice; std::vector > out = *output; size_t size = out.size(); switch (size) { @@ -67,15 +72,15 @@ void Split(const mshadow::Tensor &input, concat<1>(out[2], out[3]))) = input; break; } - case 5: { - concat<1>(out[0], - concat<1>(out[1], - concat<1>(out[2], - concat<1>(out[3], out[4])))) = input; + default: { + index_t begin = 0; + for (index_t i = 0; i < size; ++i) { + index_t end = begin + out[i].size(1); + out[i] = slice<1>(input, begin, end); + begin = end; + } break; } - default: - LOG(FATAL) << "Incorrect concat size: " << size; } } } // namespace op diff --git a/src/operator/concat-inl.h b/src/operator/concat-inl.h index 3e9c812603e3..13c53e7c2246 100644 --- a/src/operator/concat-inl.h +++ b/src/operator/concat-inl.h @@ -28,7 +28,7 @@ enum ConcatOpOutputs {kOut}; struct ConcatParam : public dmlc::Parameter { int num_args; DMLC_DECLARE_PARAMETER(ConcatParam) { - DMLC_DECLARE_FIELD(num_args).set_range(1, 6) + DMLC_DECLARE_FIELD(num_args).set_lower_bound(1) .describe("Number of inputs to be concated."); } }; // struct ConcatParam @@ -175,7 +175,7 @@ class ConcatProp : public OperatorProperty { return out_grad; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: ConcatParam param_; diff --git a/src/operator/convolution-inl.h b/src/operator/convolution-inl.h index 29a9288b2870..6a6c8590daf2 100644 --- a/src/operator/convolution-inl.h +++ b/src/operator/convolution-inl.h @@ -46,7 +46,9 @@ struct ConvolutionParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("convolution filter(channel) number"); DMLC_DECLARE_FIELD(num_group).set_default(1) - .describe("number of groups partition"); + .describe("Number of groups partition. " + "This option is not supported by CuDNN, you can use SliceChannel to num_group," + "apply convolution and concat instead to achieve the same need."); DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(128, 4096) .describe("Tmp workspace for convolution (MB)"); DMLC_DECLARE_FIELD(no_bias).set_default(false) @@ -347,7 +349,7 @@ class ConvolutionProp : public OperatorProperty { return {ResourceRequest::kTempSpace}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: ConvolutionParam param_; diff --git a/src/operator/cudnn_deconvolution-inl.h b/src/operator/cudnn_deconvolution-inl.h new file mode 100644 index 000000000000..5b6dc4b7b8c8 --- /dev/null +++ b/src/operator/cudnn_deconvolution-inl.h @@ -0,0 +1,280 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file cudnn_deconvolution-inl.h + * \brief + * \author Wei Wu +*/ +#ifndef MXNET_OPERATOR_CUDNN_DECONVOLUTION_INL_H_ +#define MXNET_OPERATOR_CUDNN_DECONVOLUTION_INL_H_ + +#include +#include +#include "./deconvolution-inl.h" + +namespace mxnet { +namespace op { +#if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 +class CuDNNDeconvolutionOp : public Operator { + public: + explicit CuDNNDeconvolutionOp(DeconvolutionParam param) { + this->param_ = param; + // convert MB to words + param_.workspace = (param_.workspace << 20) / sizeof(real_t); + init_cudnn_ = false; + // TODO(xxx): fp16 + dtype_ = CUDNN_DATA_FLOAT; + } + + ~CuDNNDeconvolutionOp() { + if (init_cudnn_) { + CHECK_EQ(cudnnDestroyTensorDescriptor(in_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyTensorDescriptor(out_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyTensorDescriptor(bias_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyFilterDescriptor(filter_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnDestroyConvolutionDescriptor(conv_desc_), CUDNN_STATUS_SUCCESS); + } + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + float alpha = 1.0f; + float beta = 0.0f; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor data = in_data[deconv::kData].get(s); + Tensor wmat = in_data[deconv::kWeight].get(s); + Tensor out = out_data[deconv::kOut].get(s); + CHECK_EQ(data.CheckContiguous(), true); + CHECK_EQ(wmat.CheckContiguous(), true); + CHECK_EQ(out.CheckContiguous(), true); + if (!init_cudnn_) { + Init(s, in_data, out_data); + } + Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( + mshadow::Shape1(forward_workspace_), s); + CHECK_EQ(cudnnConvolutionBackwardData_v3(s->dnn_handle_, + &alpha, + filter_desc_, + wmat.dptr_, + in_desc_, + data.dptr_, + conv_desc_, + back_algo_, + workspace.dptr_, + backward_workspace_byte_, + &beta, + out_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); + if (!param_.no_bias) { + beta = 1.0f; + Tensor bias = in_data[deconv::kBias].get(s); + CHECK_EQ(cudnnAddTensor(s->dnn_handle_, + CUDNN_ADD_SAME_C, + &alpha, + bias_desc_, + bias.dptr_, + &beta, + out_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + float alpha = 1.0f; + float beta = 0.0f; + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK_EQ(out_grad.size(), 1); + CHECK(in_data.size() == expected && in_grad.size() == expected); + // TODO(bing): think about how to support add to + CHECK_EQ(req[deconv::kWeight], kWriteTo); + Stream *s = ctx.get_stream(); + Tensor grad = out_grad[deconv::kOut].get(s); + Tensor wmat = in_data[deconv::kWeight].get(s); + Tensor gwmat = in_grad[deconv::kWeight].get(s); + Tensor data = in_data[deconv::kData].get(s); + Tensor gdata = in_grad[deconv::kData].get(s); + Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( + mshadow::Shape1(backward_workspace_), s); + if (!param_.no_bias) { + Tensor gbias = in_grad[deconv::kBias].get(s); + CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad.dptr_, + &beta, + bias_desc_, + gbias.dptr_), CUDNN_STATUS_SUCCESS); + } + CHECK_EQ(cudnnConvolutionBackwardFilter_v3(s->dnn_handle_, + &alpha, + out_desc_, + grad.dptr_, + in_desc_, + data.dptr_, + conv_desc_, + back_algo_w_, + workspace.dptr_, + backward_workspace_byte_, + &beta, + filter_desc_, + gwmat.dptr_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_, + &alpha, + out_desc_, + grad.dptr_, + filter_desc_, + wmat.dptr_, + conv_desc_, + algo_, + workspace.dptr_, + forward_workspace_byte_, + &beta, + in_desc_, + gdata.dptr_), CUDNN_STATUS_SUCCESS); + } + + private: + inline void Init(mshadow::Stream *s, + const std::vector &in_data, + const std::vector &out_data) { + using namespace mshadow; + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1); + if (!init_cudnn_) { + init_cudnn_ = true; + size_t workspace_byte = static_cast(param_.workspace * sizeof(real_t)); + size_t back_size = 0; + size_t back_size_w = 0; + Tensor data = in_data[deconv::kData].get(s); + Tensor out = out_data[deconv::kOut].get(s); + CHECK_EQ(cudnnCreateTensorDescriptor(&in_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnCreateTensorDescriptor(&out_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnCreateTensorDescriptor(&bias_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnCreateFilterDescriptor(&filter_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnCreateConvolutionDescriptor(&conv_desc_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetFilter4dDescriptor(filter_desc_, + dtype_, + data.shape_[1], + param_.num_filter, + param_.kernel[0], + param_.kernel[1]), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetConvolution2dDescriptor(conv_desc_, + param_.pad[0], + param_.pad[1], + param_.stride[0], + param_.stride[1], + 1, + 1, + CUDNN_CROSS_CORRELATION), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetTensor4dDescriptor(in_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + data.shape_[0], + data.shape_[1], + data.shape_[2], + data.shape_[3]), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnSetTensor4dDescriptor(out_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + out.shape_[0], + out.shape_[1], + out.shape_[2], + out.shape_[3]), CUDNN_STATUS_SUCCESS); + if (!param_.no_bias) { + Tensor bias = in_data[deconv::kBias].get(s); + CHECK_EQ(cudnnSetTensor4dDescriptor(bias_desc_, + CUDNN_TENSOR_NCHW, + dtype_, + 1, + bias.shape_[0], + 1, + 1), CUDNN_STATUS_SUCCESS); + } + CHECK_EQ(s->dnn_handle_ownership_, mshadow::Stream::OwnHandle); + CHECK_EQ(cudnnGetConvolutionForwardAlgorithm(s->dnn_handle_, + out_desc_, + filter_desc_, + conv_desc_, + in_desc_, + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, + workspace_byte, + &algo_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnGetConvolutionBackwardFilterAlgorithm(s->dnn_handle_, + out_desc_, + in_desc_, + conv_desc_, + filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, + workspace_byte, + &back_algo_w_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnGetConvolutionBackwardDataAlgorithm(s->dnn_handle_, + filter_desc_, + in_desc_, + conv_desc_, + out_desc_, + CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, + workspace_byte, + &back_algo_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_, + filter_desc_, + in_desc_, + conv_desc_, + out_desc_, + back_algo_, + &back_size), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnGetConvolutionBackwardFilterWorkspaceSize(s->dnn_handle_, + out_desc_, + in_desc_, + conv_desc_, + filter_desc_, + back_algo_w_, + &back_size_w), CUDNN_STATUS_SUCCESS); + backward_workspace_byte_ = std::max(back_size, back_size_w); + CHECK_EQ(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_, + out_desc_, + filter_desc_, + conv_desc_, + in_desc_, + algo_, + &forward_workspace_byte_), CUDNN_STATUS_SUCCESS); + forward_workspace_ = forward_workspace_byte_ / sizeof(real_t) + 1; + backward_workspace_ = backward_workspace_byte_ / sizeof(real_t) + 1; + } + } + + bool init_cudnn_; + size_t forward_workspace_; + size_t backward_workspace_; + size_t forward_workspace_byte_; + size_t backward_workspace_byte_; + cudnnDataType_t dtype_; + cudnnTensorDescriptor_t in_desc_; + cudnnTensorDescriptor_t out_desc_; + cudnnTensorDescriptor_t bias_desc_; + cudnnFilterDescriptor_t filter_desc_; + cudnnConvolutionDescriptor_t conv_desc_; + cudnnConvolutionFwdAlgo_t algo_; + cudnnConvolutionBwdDataAlgo_t back_algo_; + cudnnConvolutionBwdFilterAlgo_t back_algo_w_; + DeconvolutionParam param_; +}; +#endif // __CUDACC__ && CUDNN +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_CUDNN_DECONVOLUTION_INL_H_ diff --git a/src/operator/deconvolution-inl.h b/src/operator/deconvolution-inl.h new file mode 100644 index 000000000000..c94c56691421 --- /dev/null +++ b/src/operator/deconvolution-inl.h @@ -0,0 +1,361 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file deconvolution-inl.h + * \brief + * \author Wei Wu +*/ +#ifndef MXNET_OPERATOR_DECONVOLUTION_INL_H_ +#define MXNET_OPERATOR_DECONVOLUTION_INL_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + + +namespace mxnet { +namespace op { + +namespace deconv { + enum DeconvolutionOpInputs {kData, kWeight, kBias}; + enum DeconvolutionOpOutputs {kOut}; + enum DeconvolutionOpResource {kTempSpace}; +} + +struct DeconvolutionParam : public dmlc::Parameter { + TShape kernel; + TShape stride; + TShape pad; + uint32_t num_filter; + uint32_t num_group; + uint64_t workspace; + bool no_bias; + DMLC_DECLARE_PARAMETER(DeconvolutionParam) { + int shape[] = {1, 1}; + DMLC_DECLARE_FIELD(kernel).describe("deconvolution kernel size: (y, x)"); + DMLC_DECLARE_FIELD(stride).set_default(TShape(shape, shape + 2)) + .describe("deconvolution stride: (y, x)"); + shape[0] = shape[1] = 0; + DMLC_DECLARE_FIELD(pad).set_default(TShape(shape, shape + 2)) + .describe("pad for deconvolution: (y, x)"); + DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) + .describe("deconvolution filter(channel) number"); + DMLC_DECLARE_FIELD(num_group).set_default(1) + .describe("number of groups partition"); + DMLC_DECLARE_FIELD(workspace).set_default(512).set_range(128, 4096) + .describe("Tmp workspace for deconvolution (MB)"); + DMLC_DECLARE_FIELD(no_bias).set_default(true) + .describe("Whether to disable bias parameter."); + } +}; + +template +class DeconvolutionOp : public Operator { + public: + explicit DeconvolutionOp(DeconvolutionParam p) { + this->param_ = p; + // convert MB to words + param_.workspace = (param_.workspace << 20) / sizeof(real_t); + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + CHECK_EQ(req[deconv::kOut], kWriteTo); + size_t expected = param_.no_bias ? 2 : 3; + CHECK_EQ(in_data.size(), expected); + CHECK_EQ(out_data.size(), 1); + Stream *s = ctx.get_stream(); + Tensor data = in_data[deconv::kData].get(s); + Tensor out = out_data[deconv::kOut].get(s); + Shape<3> wmat_shape = + Shape3(param_.num_group, + data.shape_[1] / param_.num_group, + param_.num_filter / param_.num_group * param_.kernel[0] * param_.kernel[1]); + Tensor wmat = in_data[deconv::kWeight].get_with_shape(wmat_shape, s); +#if defined(__CUDACC__) + CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) + << "Must init CuBLAS handle in stream"; +#endif + const index_t nbatch = data.size(0); + Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( + Shape1(this->InitTemp(out.shape_, data.shape_)), s); + for (index_t i = 0; i < nbatch; i += nstep_) { + const index_t step = std::min(nstep_, nbatch - i); + Tensor temp_col = Tensor(workspace.dptr_, + Shape2(shape_colunit_[0], + shape_colunit_[1] * step), s); + Tensor temp_dst = Tensor(workspace.dptr_ + temp_col.shape_.Size(), + Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * step), s); + temp_dst = reshape(swapaxis<1, 0>(data.Slice(i, i + step)), temp_dst.shape_); + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + temp_col = unpack_patch2col(out.Slice(i, i + step), + param_.kernel[0], + param_.kernel[1], + param_.stride[0], + param_.stride[1]); + } else { + temp_col = unpack_patch2col(pad(out.Slice(i, i + step), + param_.pad[0], param_.pad[1]), + param_.kernel[0], + param_.kernel[1], + param_.stride[0], + param_.stride[1]); + } + const index_t gstride = temp_col.size(0) / param_.num_group; + for (uint32_t gid = 0; gid < param_.num_group; ++gid) { + mshadow::Tensor tmpc = temp_col.Slice(gstride * gid, + gstride * (gid + 1)); + tmpc = dot(wmat[gid].T(), temp_dst[gid]); + } + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + out.Slice(i, i + step) = pack_col2patch(temp_col, + out.Slice(i, i + step).shape_, + param_.kernel[0], + param_.kernel[1], + param_.stride[0]); + } else { + Shape<4> pshape = out.Slice(i, i + step).shape_; + pshape[2] += 2 * param_.pad[0]; + pshape[3] += 2 * param_.pad[1]; + out.Slice(i, i + step) = crop(pack_col2patch(temp_col, + pshape, + param_.kernel[0], + param_.kernel[1], + param_.stride[0]), + out[i][0].shape_); + } + } + if (!param_.no_bias) { + // add bias, broadcast bias to dim 1: channel + Tensor bias = in_data[deconv::kBias].get(s); + out += broadcast<1>(bias, out.shape_); + } + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + using namespace mshadow::expr; + // TODO(bing): check the BLAS Handle, be careful + CHECK_EQ(out_grad.size(), 1); + size_t expected = param_.no_bias == 0 ? 3 : 2; + CHECK(in_data.size() == expected && in_grad.size() == expected); + CHECK_EQ(req.size(), expected); + CHECK_EQ(in_data[deconv::kWeight].CheckContiguous(), true); + // get data + Stream *s = ctx.get_stream(); + Tensor data = in_data[deconv::kData].get(s); + Tensor grad = out_grad[deconv::kOut].get(s); + Tensor gdata = in_grad[deconv::kData].get(s); + Shape<3> wmat_shape = + Shape3(param_.num_group, + data.shape_[1] / param_.num_group, + param_.num_filter / param_.num_group * param_.kernel[0] * param_.kernel[1]); + Tensor wmat = in_data[deconv::kWeight].get_with_shape(wmat_shape, s); + Tensor gwmat = in_grad[deconv::kWeight].get_with_shape(wmat_shape, s); +#if defined(__CUDACC__) + CHECK_EQ(s->blas_handle_ownership_, Stream::OwnHandle) + << "Must init CuBLAS handle in stream"; +#endif + const index_t nbatch = data.size(0); + Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( + Shape1(this->InitTemp(grad.shape_, data.shape_)), s); + for (index_t i = 0; i < nbatch; i += nstep_) { + const index_t step = std::min(nstep_, nbatch - i); + Tensor temp_col = Tensor(workspace.dptr_, + Shape2(shape_colunit_[0], + shape_colunit_[1] * step), s); + Tensor temp_dst = Tensor(workspace.dptr_ + temp_col.shape_.Size(), + Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * step), s); + temp_dst = reshape(swapaxis<1, 0>(data.Slice(i, i + step)), temp_dst.shape_); + if (param_.pad[0] == 0 && param_.pad[1] == 0) { + temp_col = unpack_patch2col(grad.Slice(i, i + step), + param_.kernel[0], + param_.kernel[1], + param_.stride[0], + param_.stride[1]); + } else { + temp_col = unpack_patch2col(pad(grad.Slice(i, i + step), param_.pad[0], param_.pad[1]), + param_.kernel[0], + param_.kernel[1], + param_.stride[0], + param_.stride[1]); + } + const index_t gstride = temp_col.size(0) / param_.num_group; + for (uint32_t gid = 0; gid < param_.num_group; ++gid) { + Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); + if (i == 0) { + Tensor tmp_gwmat = gwmat[gid]; + Assign(tmp_gwmat, req[deconv::kWeight], dot(temp_dst[gid], tmpc.T())); + } else { + gwmat[gid] += dot(temp_dst[gid], tmpc.T()); + } + } + if (req[deconv::kData] == kWriteTo || req[deconv::kData] == kWriteInplace) { + for (uint32_t gid = 0; gid < param_.num_group; ++gid) { + Tensor tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1)); + temp_dst[gid] = dot(wmat[gid], tmpc); + } + gdata.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst, + mshadow::Shape4(gdata.shape_[1], + step, + gdata.size(2), + gdata.size(3)))); + } + } + if (!param_.no_bias) { + Tensor gbias = in_grad[deconv::kBias].get(s); + Assign(gbias, req[deconv::kBias], sumall_except_dim<1>(grad)); + } + } + + private: + inline index_t InitTemp(const mshadow::Shape<4> &ishape, + const mshadow::Shape<4> &oshape) { + const int ksize_y = param_.kernel[0]; + const int ksize_x = param_.kernel[1]; + shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x, + oshape[2] * oshape[3]); + shape_dstunit_ = mshadow::Shape3(param_.num_group, + oshape[1] / param_.num_group, + oshape[2] * oshape[3]); + const uint64_t workspace_size = param_.workspace; + nstep_ = std::max(std::min(static_cast(workspace_size / shape_colunit_.Size()), + ishape[0]), 1U); + int nop = (ishape[0] + nstep_ - 1) / nstep_; + nstep_ = (ishape[0] + nop - 1) / nop; + mshadow::Shape<2> scol = mshadow::Shape2(shape_colunit_[0], + shape_colunit_[1] * nstep_); + mshadow::Shape<3> sdst = mshadow::Shape3(shape_dstunit_[0], + shape_dstunit_[1], + shape_dstunit_[2] * nstep_); + CHECK_GE(param_.workspace, scol.Size() + sdst.Size()) + << "\nMinimum workspace size: " << scol.Size() + sdst.Size() << "\n" + << "Given: " << param_.workspace; + return scol.Size() + sdst.Size(); + } + + DeconvolutionParam param_; + mshadow::Shape<2> shape_colunit_; + mshadow::Shape<3> shape_dstunit_; + index_t nstep_; +}; // class DeconvolutionOp + +template +Operator* CreateOp(DeconvolutionParam param); + +#if DMLC_USE_CXX11 +class DeconvolutionProp : public OperatorProperty { + public: + std::vector ListArguments() const override { + if (!param_.no_bias) { + return {"data", "weight", "bias"}; + } else { + return {"data", "weight"}; + } + } + + void Init(const std::vector >& kwargs) override { + param_.Init(kwargs); + } + + std::map GetParams() const override { + return param_.__DICT__(); + } + + bool InferShape(std::vector *in_shape, + std::vector *out_shape, + std::vector *aux_shape) const override { + using namespace mshadow; + if (!param_.no_bias) { + CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]"; + } else { + CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]"; + } + const TShape &dshape = (*in_shape)[deconv::kData]; + if (dshape.ndim() == 0) return false; + CHECK_EQ(dshape.ndim(), 4) \ + << "Input data should be 4D in batch-num_filter-y-x"; + SHAPE_ASSIGN_CHECK(*in_shape, + deconv::kWeight, + Shape4(dshape[1], param_.num_filter, param_.kernel[0], param_.kernel[1])); + if (!param_.no_bias) { + SHAPE_ASSIGN_CHECK(*in_shape, deconv::kBias, Shape1(param_.num_filter)); + } + out_shape->clear(); + out_shape->push_back(dshape); + const index_t ksize_y = static_cast(param_.kernel[0]); + const index_t ksize_x = static_cast(param_.kernel[1]); + CHECK_EQ(dshape[1] % param_.num_group, 0) \ + << "input num_filter must divide group size"; + CHECK_EQ(param_.num_filter % param_.num_group, 0) \ + << "output num_filter must divide group size"; + CHECK_GE(param_.kernel.Size(), 0) \ + << "incorrect kernel size: " << param_.kernel; + CHECK_GE(param_.stride.Size(), 0) \ + << "incorrect stride size: " << param_.stride; + CHECK(ksize_x <= dshape[3] && ksize_y <= dshape[2]) + << "kernel size exceed input"; + (*out_shape)[deconv::kOut][1] = param_.num_filter; + (*out_shape)[deconv::kOut][2] = param_.stride[0] * (dshape[2] - 1) + + ksize_y - 2 * param_.pad[0]; + (*out_shape)[deconv::kOut][3] = param_.stride[1] * (dshape[3] - 1) + + ksize_x - 2 * param_.pad[1]; + return true; + } + + OperatorProperty* Copy() const override { + auto ptr = new DeconvolutionProp(); + ptr->param_ = param_; + return ptr; + } + + std::string TypeString() const override { + return "Deconvolution"; + } + + std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { + return {out_grad[deconv::kOut], in_data[deconv::kData], in_data[deconv::kWeight]}; + } + + std::vector ForwardResource( + const std::vector &in_shape) const override { + return {ResourceRequest::kTempSpace}; + } + + std::vector BackwardResource( + const std::vector &in_shape) const override { + return {ResourceRequest::kTempSpace}; + } + + Operator* CreateOperator(Context ctx) const override; + + private: + DeconvolutionParam param_; +}; // class DeconvolutionProp +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_DECONVOLUTION_INL_H_ diff --git a/src/operator/deconvolution.cc b/src/operator/deconvolution.cc new file mode 100644 index 000000000000..fe5deeafc05b --- /dev/null +++ b/src/operator/deconvolution.cc @@ -0,0 +1,31 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file deconvolution.cc + * \brief + * \author Wei Wu +*/ + +#include "./deconvolution-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(DeconvolutionParam param) { + return new DeconvolutionOp(param); +} + +Operator* DeconvolutionProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(DeconvolutionParam); + +MXNET_REGISTER_OP_PROPERTY(Deconvolution, DeconvolutionProp) +.add_argument("data", "Symbol", "Input data to the DeconvolutionOp.") +.add_argument("weight", "Symbol", "Weight matrix.") +.add_argument("bias", "Symbol", "Bias parameter.") +.add_arguments(DeconvolutionParam::__FIELDS__()) +.describe("Apply deconvolution to input then add a bias."); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/deconvolution.cu b/src/operator/deconvolution.cu new file mode 100644 index 000000000000..d7662735e89c --- /dev/null +++ b/src/operator/deconvolution.cu @@ -0,0 +1,25 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file deconvolution.cu + * \brief + * \author Wei Wu +*/ + +#include "./deconvolution-inl.h" +#if MXNET_USE_CUDNN == 1 +#include "./cudnn_deconvolution-inl.h" +#endif // MXNET_USE_CUDNN + +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(DeconvolutionParam param) { +#if MXNET_USE_CUDNN == 1 + return new CuDNNDeconvolutionOp(param); +#else + return new DeconvolutionOp(param); +#endif // MXNET_USE_CUDNN +} + +} // namespace op +} // namespace mxnet diff --git a/src/operator/dropout-inl.h b/src/operator/dropout-inl.h index fa76bd38ccf6..1d117bf24c3d 100644 --- a/src/operator/dropout-inl.h +++ b/src/operator/dropout-inl.h @@ -164,7 +164,7 @@ class DropoutProp : public OperatorProperty { return {"output", "mask"}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: DropoutParam param_; diff --git a/src/operator/elementwise_sum-inl.h b/src/operator/elementwise_sum-inl.h index d9c4c0e36206..f763032690d7 100644 --- a/src/operator/elementwise_sum-inl.h +++ b/src/operator/elementwise_sum-inl.h @@ -194,7 +194,7 @@ class ElementWiseSumProp : public OperatorProperty { return {{in_data[0], out_data[0]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: ElementWiseSumParam param_; diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 262aba95d0fb..3454c3498cf9 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -195,7 +195,7 @@ class FullyConnectedProp : public OperatorProperty { return {{in_data[fullc::kData], in_grad[fullc::kData]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: FullyConnectedParam param_; diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h index 4bdb65ef415a..3d4429556877 100644 --- a/src/operator/leaky_relu-inl.h +++ b/src/operator/leaky_relu-inl.h @@ -128,7 +128,6 @@ class LeakyReLUOp : public Operator { const std::vector &aux_args) { using namespace mshadow; using namespace mshadow::expr; - // TODO(bing): double check size_t expected = param_.act_type == leakyrelu::kPReLU ? 2 : 1; CHECK_EQ(out_grad.size(), 1); CHECK_EQ(req.size(), expected); @@ -141,9 +140,9 @@ class LeakyReLUOp : public Operator { Tensor mask; Tensor weight; Tensor grad_weight; - if (in_data[leakyrelu::kData].ndim() == 2) { - Shape<4> dshape = Shape4(in_data[leakyrelu::kData].shape_[0], - in_data[leakyrelu::kData].shape_[1], 1, 1); + if (out_grad[leakyrelu::kOut].ndim() == 2) { + Shape<4> dshape = Shape4(out_grad[leakyrelu::kOut].shape_[0], + out_grad[leakyrelu::kOut].shape_[1], 1, 1); grad = out_grad[leakyrelu::kOut].get_with_shape(dshape, s); gdata = in_grad[leakyrelu::kData].get_with_shape(dshape, s); output = out_data[leakyrelu::kOut].get_with_shape(dshape, s); @@ -298,8 +297,8 @@ class LeakyReLUProp : public OperatorProperty { return 1; } - virtual std::vector ForwardResource( - const std::vector &in_shape) const { + std::vector ForwardResource( + const std::vector &in_shape) const override { if (param_.act_type == leakyrelu::kRReLU) { return {ResourceRequest::kRandom}; } else { @@ -307,7 +306,7 @@ class LeakyReLUProp : public OperatorProperty { } } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: LeakyReLUParam param_; diff --git a/src/operator/lrn-inl.h b/src/operator/lrn-inl.h index 35aac8fe73ae..40985e7b5112 100644 --- a/src/operator/lrn-inl.h +++ b/src/operator/lrn-inl.h @@ -179,7 +179,7 @@ class LocalResponseNormProp : public OperatorProperty { #endif } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: LRNParam param_; diff --git a/src/operator/native_op-inl.h b/src/operator/native_op-inl.h new file mode 100644 index 000000000000..afe6868aa415 --- /dev/null +++ b/src/operator/native_op-inl.h @@ -0,0 +1,256 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file native_op-inl.h + * \brief + * \author Junyuan Xie +*/ + +#ifndef MXNET_OPERATOR_NATIVE_OP_INL_H_ +#define MXNET_OPERATOR_NATIVE_OP_INL_H_ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "./operator_common.h" + +namespace mxnet { +namespace op { + +struct NativeOpParam : public dmlc::Parameter { + void *info; + bool need_top_grad; + + NativeOpInfo *pinfo; + int num_inputs_, num_outputs_; + DMLC_DECLARE_PARAMETER(NativeOpParam) { + DMLC_DECLARE_FIELD(info); + DMLC_DECLARE_FIELD(need_top_grad).set_default(true) + .describe("Whether this layer needs out grad for backward. " + "Should be false for loss layers."); + } +}; + +template +class NativeOp : public Operator { + public: + explicit NativeOp(NativeOpParam p) { + this->param_ = p; + } + + virtual void Forward(const OpContext &ctx, + const std::vector &in_data, + const std::vector &req, + const std::vector &out_data, + const std::vector &aux_args) { + using namespace mshadow; + Stream *s = ctx.get_stream(); + ptrs.clear(); + ndims.clear(); + shapes.clear(); + tags.clear(); + SyncVec(in_data, "in_data", s, 0); + SyncVec(out_data, "out_data", s, 1); + s->Wait(); + param_.pinfo->forward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data()); + for (index_t i = 0; i < out_data.size(); ++i) { + CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output"; + if (req[i] != kNullOp) { + std::stringstream ss; + ss << std::string("out_data") << i; + Copy(out_data[i].FlatTo2D(s), + buffer_map[ss.str()].second, s); + } + } + s->Wait(); + } + + virtual void Backward(const OpContext &ctx, + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &req, + const std::vector &in_grad, + const std::vector &aux_args) { + using namespace mshadow; + Stream *s = ctx.get_stream(); + ptrs.clear(); + ndims.clear(); + shapes.clear(); + tags.clear(); + SyncVec(in_data, "in_data", s, 0); + SyncVec(out_data, "out_data", s, 1); + SyncVec(in_grad, "in_grad", s, 2); + if (param_.need_top_grad) { + SyncVec(out_grad, "out_grad", s, 3); + } + s->Wait(); + param_.pinfo->backward(ptrs.size(), ptrs.data(), ndims.data(), shapes.data(), tags.data()); + for (index_t i = 0; i < in_grad.size(); ++i) { + CHECK_NE(req[i], kAddTo) << "NativeOp doesn't support AddTo for output"; + if (req[i] != kNullOp) { + std::stringstream ss; + ss << std::string("in_grad") << i; + Copy(in_grad[i].FlatTo2D(s), + buffer_map[ss.str()].second, s); + } + } + s->Wait(); + } + + private: + NativeOpParam param_; + std::vector ptrs; + std::vector ndims; + std::vector shapes; + std::vector tags; + std::map > > buffer_map; + + virtual void SyncBuffer(const TBlob &tblob, + const std::string &name, + mshadow::Stream *stream) { + using namespace mshadow; + std::map > >::iterator buffer = + buffer_map.find(name); + if (buffer == buffer_map.end() || buffer->second.first != tblob.shape_) { + if (buffer != buffer_map.end()) { + FreeSpace<2, real_t>(&(buffer->second.second)); + buffer_map.erase(buffer); + } + buffer_map[name] = + std::pair >(tblob.shape_, + NewTensor(tblob.shape_.FlatTo2D(), + 0.0f, + false)); + buffer = buffer_map.find(name); + } + Copy(buffer->second.second, tblob.FlatTo2D(stream), stream); + } + + virtual void SyncVec(const std::vector &vec, + const std::string &prefix, + mshadow::Stream *stream, + int tag) { + for (size_t i = 0; i < vec.size(); ++i) { + std::stringstream name; + name << prefix << i; + SyncBuffer(vec[i], name.str(), stream); + ptrs.push_back(buffer_map[name.str()].second.dptr_); + ndims.push_back(vec[i].ndim()); + shapes.push_back(const_cast(vec[i].shape_.data())); + tags.push_back(tag); + } + } +}; // NativeOp + +template +Operator* CreateOp(NativeOpParam param); + +#if DMLC_USE_CXX11 +class NativeOpProp : public OperatorProperty { + public: + std::vector ListArguments() const override { + char ** args = NULL; + param_.pinfo->list_arguments(&args); + std::vector ret; + for (int i = 0; args[i] != NULL; ++i) { + ret.push_back(args[i]); + } + return ret; + } + + std::vector ListOutputs() const override { + char ** args = NULL; + param_.pinfo->list_outputs(&args); + std::vector ret; + for (int i = 0; args[i] != NULL; ++i) { + ret.push_back(args[i]); + } + return ret; + } + + int NumOutputs() const override { + return param_.num_outputs_; + } + + void Init(const std::vector >& kwargs) override { + param_.Init(kwargs); + for (auto iter = kwargs.begin(); iter != kwargs.end(); ++iter) { + if (iter->first == "info") { + sscanf(iter->second.c_str(), "%p", ¶m_.pinfo); + } + } + param_.num_inputs_ = ListArguments().size(); + param_.num_outputs_ = ListOutputs().size(); + } + + std::map GetParams() const override { + return param_.__DICT__(); + } + + + bool InferShape(std::vector *in_shape, + std::vector *out_shape, + std::vector *aux_shape) const override { + std::vector shapes; + std::vector ndims; + for (auto iter = in_shape->begin(); iter != in_shape->end(); ++iter) { + shapes.push_back(iter->data()); + ndims.push_back(iter->ndim()); + } + shapes.resize(param_.num_inputs_+param_.num_outputs_); + ndims.resize(param_.num_inputs_+param_.num_outputs_); + param_.pinfo->infer_shape(shapes.size(), ndims.data(), shapes.data()); + for (unsigned i = 0; i < in_shape->size(); ++i) { + (*in_shape)[i] = TShape(shapes[i], shapes[i]+ndims[i]); + } + for (unsigned i = param_.num_inputs_; i < param_.num_inputs_ + out_shape->size(); ++i) { + (*out_shape)[i-param_.num_inputs_] = TShape(shapes[i], shapes[i]+ndims[i]); + } + return true; + } + + OperatorProperty* Copy() const override { + NativeOpProp *prop_sym = new NativeOpProp(); + prop_sym->param_ = this->param_; + return prop_sym; + } + + std::string TypeString() const override { + return "_Native"; + } + + std::vector DeclareBackwardDependency( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data) const override { + std::vector deps; + if (param_.need_top_grad) { + deps.insert(deps.end(), out_grad.begin(), out_grad.end()); + } + deps.insert(deps.end(), in_data.begin(), in_data.end()); + deps.insert(deps.end(), out_data.begin(), out_data.end()); + return deps; + } + + std::vector > BackwardInplaceOption( + const std::vector &out_grad, + const std::vector &in_data, + const std::vector &out_data, + const std::vector &in_grad) const override { + return {}; + } + + Operator* CreateOperator(Context ctx) const override; + + private: + NativeOpParam param_; +}; // class PythonProp +#endif // DMLC_USE_CXX11 +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_NATIVE_OP_INL_H_ diff --git a/src/operator/native_op.cc b/src/operator/native_op.cc new file mode 100644 index 000000000000..7ab0614a041c --- /dev/null +++ b/src/operator/native_op.cc @@ -0,0 +1,27 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file native_op.cc + * \brief + * \author Junyuan Xie +*/ +#include "./native_op-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(NativeOpParam param) { + return new NativeOp(param); +} + +Operator* NativeOpProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(NativeOpParam); + +MXNET_REGISTER_OP_PROPERTY(_Native, NativeOpProp) +.describe("Stub for implementing an operator implemented in native frontend language.") +.add_arguments(NativeOpParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/native_op.cu b/src/operator/native_op.cu new file mode 100644 index 000000000000..807592626e8b --- /dev/null +++ b/src/operator/native_op.cu @@ -0,0 +1,15 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file native_op.cu + * \brief + * \author Junyuan Xie +*/ +#include "./native_op-inl.h" +namespace mxnet { +namespace op { +template<> +Operator* CreateOp(NativeOpParam param) { + return new NativeOp(param); +} +} // namespace op +} // namespace mxnet diff --git a/src/operator/pooling-inl.h b/src/operator/pooling-inl.h index 1f3d76e1ab7a..54808c9bed19 100644 --- a/src/operator/pooling-inl.h +++ b/src/operator/pooling-inl.h @@ -215,7 +215,7 @@ class PoolingProp : public OperatorProperty { #endif } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: PoolingParam param_; diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h index 479579d4b472..0366d16e85a8 100644 --- a/src/operator/regression_output-inl.h +++ b/src/operator/regression_output-inl.h @@ -130,7 +130,7 @@ class RegressionOutputProp : public OperatorProperty { return {{in_data[reg_enum::kData], out_data[reg_enum::kOut]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; }; #endif // DMLC_USE_CXX11 } // namespace op diff --git a/src/operator/reshape-inl.h b/src/operator/reshape-inl.h index 12c2071a8c97..915b6938883c 100644 --- a/src/operator/reshape-inl.h +++ b/src/operator/reshape-inl.h @@ -144,7 +144,7 @@ class ReshapeProp : public OperatorProperty { return {{out_grad[reshape_enum::kOut], in_grad[reshape_enum::kData]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; protected: ReshapeParam param_; diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h index 05e3da199bda..6c426ec1bb65 100644 --- a/src/operator/slice_channel-inl.h +++ b/src/operator/slice_channel-inl.h @@ -29,7 +29,7 @@ enum SliceChannelOpOutputs {kOut0, kOut1, kOut2, kOut3, kOut4}; struct SliceChannelParam : public dmlc::Parameter { int num_outputs; DMLC_DECLARE_PARAMETER(SliceChannelParam) { - DMLC_DECLARE_FIELD(num_outputs).set_range(1, 6) + DMLC_DECLARE_FIELD(num_outputs).set_lower_bound(1) .describe("Number of outputs to be sliced."); } }; // struct SliceChannelParam @@ -170,7 +170,7 @@ class SliceChannelProp : public OperatorProperty { return out_grad; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; private: SliceChannelParam param_; diff --git a/src/operator/softmax.cc b/src/operator/softmax.cc deleted file mode 100644 index 2c2516ba9bc9..000000000000 --- a/src/operator/softmax.cc +++ /dev/null @@ -1,29 +0,0 @@ -/*! - * Copyright (c) 2015 by Contributors - * \file softmax.cc - * \brief - * \author Bing Xu -*/ -#include "./softmax-inl.h" - -namespace mxnet { -namespace op { -template<> -Operator *CreateOp(SoftmaxParam param) { - return new SoftmaxOp(param); -} - -Operator *SoftmaxProp::CreateOperator(Context ctx) const { - DO_BIND_DISPATCH(CreateOp, param_); -} - -DMLC_REGISTER_PARAMETER(SoftmaxParam); - -MXNET_REGISTER_OP_PROPERTY(Softmax, SoftmaxProp) -.describe("Perform a softmax transformation on input.") -.add_argument("data", "Symbol", "Input data to softmax.") -.add_arguments(SoftmaxParam::__FIELDS__()); - -} // namespace op -} // namespace mxnet - diff --git a/src/operator/softmax-inl.h b/src/operator/softmax_output-inl.h similarity index 57% rename from src/operator/softmax-inl.h rename to src/operator/softmax_output-inl.h index d1e5331d9d06..9528ed0a41c6 100644 --- a/src/operator/softmax-inl.h +++ b/src/operator/softmax_output-inl.h @@ -1,11 +1,11 @@ /*! * Copyright (c) 2015 by Contributors - * \file softmax-inl.h + * \file softmax_output-inl.h * \brief * \author Bing Xu */ -#ifndef MXNET_OPERATOR_SOFTMAX_INL_H_ -#define MXNET_OPERATOR_SOFTMAX_INL_H_ +#ifndef MXNET_OPERATOR_SOFTMAX_OUTPUT_INL_H_ +#define MXNET_OPERATOR_SOFTMAX_OUTPUT_INL_H_ #include #include @@ -20,15 +20,15 @@ namespace mxnet { namespace op { -namespace softmax_enum { -enum SoftmaxOpInputs {kData, kLabel}; -enum SoftmaxOpOutputs {kOut}; -} // namespace softmax_enum +namespace softmaxout_enum { +enum SoftmaxOutputOpInputs {kData, kLabel}; +enum SoftmaxOutputOpOutputs {kOut}; +} // namespace softmaxout_enum -struct SoftmaxParam : public dmlc::Parameter { +struct SoftmaxOutputParam : public dmlc::Parameter { float grad_scale; bool multi_output; - DMLC_DECLARE_PARAMETER(SoftmaxParam) { + DMLC_DECLARE_PARAMETER(SoftmaxOutputParam) { DMLC_DECLARE_FIELD(grad_scale).set_default(1.0f) .describe("Scale the gradient by a float factor"); DMLC_DECLARE_FIELD(multi_output).set_default(false) @@ -39,9 +39,9 @@ struct SoftmaxParam : public dmlc::Parameter { }; template -class SoftmaxOp : public Operator { +class SoftmaxOutputOp : public Operator { public: - explicit SoftmaxOp(SoftmaxParam param) : param_(param) {} + explicit SoftmaxOutputOp(SoftmaxOutputParam param) : param_(param) {} virtual void Forward(const OpContext &ctx, const std::vector &in_data, @@ -50,19 +50,19 @@ class SoftmaxOp : public Operator { const std::vector &aux_args) { using namespace mshadow; using namespace mshadow::expr; - CHECK_EQ(in_data.size(), 2) << "Softmax Input: [data, label]"; - CHECK_EQ(out_data.size(), 1) << "Softmax Output: [output]"; + CHECK_EQ(in_data.size(), 2) << "SoftmaxOutput Input: [data, label]"; + CHECK_EQ(out_data.size(), 1) << "SoftmaxOutput Output: [output]"; Stream *s = ctx.get_stream(); if (param_.multi_output) { - int n = in_data[softmax_enum::kData].size(0); - int k = in_data[softmax_enum::kData].size(1); - Shape<3> s3 = Shape3(n, k, static_cast(in_data[softmax_enum::kData].Size()/n/k)); - Tensor data = in_data[softmax_enum::kData].get_with_shape(s3, s); - Tensor out = out_data[softmax_enum::kOut].get_with_shape(s3, s); + int n = in_data[softmaxout_enum::kData].size(0); + int k = in_data[softmaxout_enum::kData].size(1); + Shape<3> s3 = Shape3(n, k, static_cast(in_data[softmaxout_enum::kData].Size()/n/k)); + Tensor data = in_data[softmaxout_enum::kData].get_with_shape(s3, s); + Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); Softmax(out, data); } else { - Tensor data = in_data[softmax_enum::kData].FlatTo2D(s); - Tensor out = out_data[softmax_enum::kOut].FlatTo2D(s); + Tensor data = in_data[softmaxout_enum::kData].FlatTo2D(s); + Tensor out = out_data[softmaxout_enum::kOut].FlatTo2D(s); Softmax(out, data); } } @@ -82,20 +82,20 @@ class SoftmaxOp : public Operator { CHECK_GE(req.size(), 1); Stream *s = ctx.get_stream(); if (param_.multi_output) { - int n = out_data[softmax_enum::kOut].size(0); - int k = out_data[softmax_enum::kOut].size(1); - Shape<3> s3 = Shape3(n, k, static_cast(out_data[softmax_enum::kOut].Size()/n/k)); - Tensor label = in_data[softmax_enum::kLabel].FlatTo2D(s); - Tensor out = out_data[softmax_enum::kOut].get_with_shape(s3, s); - Tensor grad = in_grad[softmax_enum::kData].get_with_shape(s3, s); + int n = out_data[softmaxout_enum::kOut].size(0); + int k = out_data[softmaxout_enum::kOut].size(1); + Shape<3> s3 = Shape3(n, k, static_cast(out_data[softmaxout_enum::kOut].Size()/n/k)); + Tensor label = in_data[softmaxout_enum::kLabel].FlatTo2D(s); + Tensor out = out_data[softmaxout_enum::kOut].get_with_shape(s3, s); + Tensor grad = in_grad[softmaxout_enum::kData].get_with_shape(s3, s); SoftmaxGrad(grad, out, label); if (param_.grad_scale < 1.0) { grad *= param_.grad_scale; } } else { - Tensor label = in_data[softmax_enum::kLabel].get(s); - Tensor out = out_data[softmax_enum::kOut].FlatTo2D(s); - Tensor grad = in_grad[softmax_enum::kData].FlatTo2D(s); + Tensor label = in_data[softmaxout_enum::kLabel].get(s); + Tensor out = out_data[softmaxout_enum::kOut].FlatTo2D(s); + Tensor grad = in_grad[softmaxout_enum::kData].FlatTo2D(s); SoftmaxGrad(grad, out, label); if (param_.grad_scale < 1.0) { grad *= param_.grad_scale; @@ -104,15 +104,15 @@ class SoftmaxOp : public Operator { } private: - SoftmaxParam param_; -}; // class SoftmaxOp + SoftmaxOutputParam param_; +}; // class SoftmaxOutputOp // Decalre Factory function, used for dispatch specialization template -Operator* CreateOp(SoftmaxParam param); +Operator* CreateOp(SoftmaxOutputParam param); #if DMLC_USE_CXX11 -class SoftmaxProp : public OperatorProperty { +class SoftmaxOutputProp : public OperatorProperty { public: std::vector ListArguments() const override { return {"data", "label"}; @@ -134,10 +134,10 @@ class SoftmaxProp : public OperatorProperty { const TShape &dshape = in_shape->at(0); if (dshape.ndim() == 0) return false; if (param_.multi_output) { - SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel, + SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1])); } else { - SHAPE_ASSIGN_CHECK(*in_shape, softmax_enum::kLabel, Shape1(dshape[0])); + SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, Shape1(dshape[0])); } out_shape->clear(); out_shape->push_back(dshape); @@ -145,20 +145,20 @@ class SoftmaxProp : public OperatorProperty { } OperatorProperty* Copy() const override { - auto ptr = new SoftmaxProp(); + auto ptr = new SoftmaxOutputProp(); ptr->param_ = param_; return ptr; } std::string TypeString() const override { - return "Softmax"; + return "SoftmaxOutput"; } std::vector DeclareBackwardDependency( const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {in_data[softmax_enum::kLabel], out_data[softmax_enum::kOut]}; + return {in_data[softmaxout_enum::kLabel], out_data[softmaxout_enum::kOut]}; } std::vector > BackwardInplaceOption( @@ -166,22 +166,35 @@ class SoftmaxProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data, const std::vector &in_grad) const override { - return {{out_data[softmax_enum::kOut], in_grad[softmax_enum::kData]}}; + return {{out_data[softmaxout_enum::kOut], in_grad[softmaxout_enum::kData]}}; } std::vector > ForwardInplaceOption( const std::vector &in_data, const std::vector &out_data) const override { - return {{in_data[softmax_enum::kData], out_data[softmax_enum::kOut]}}; + return {{in_data[softmaxout_enum::kData], out_data[softmaxout_enum::kOut]}}; } - Operator* CreateOperator(Context ctx) const; + Operator* CreateOperator(Context ctx) const override; - private: - SoftmaxParam param_; -}; // class SoftmaxProp + protected: + SoftmaxOutputParam param_; +}; // class SoftmaxOutputProp + +class DeprecatedSoftmaxProp : public SoftmaxOutputProp { + public: + void Init(const std::vector >& kwargs) override { + LOG(INFO) << "Softmax symbol is renamed to SoftmaxOutput. " + << "This API will be deprecated in Dec, 2015"; + SoftmaxOutputProp::param_.Init(kwargs); + } + + std::string TypeString() const override { + return "Softmax"; + } +}; #endif // DMLC_USE_CXX11 } // namespace op } // namespace mxnet -#endif // MXNET_OPERATOR_SOFTMAX_INL_H_ +#endif // MXNET_OPERATOR_SOFTMAX_OUTPUT_INL_H_ diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc new file mode 100644 index 000000000000..bc1ba367ccaa --- /dev/null +++ b/src/operator/softmax_output.cc @@ -0,0 +1,34 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file softmax_output.cc + * \brief + * \author Bing Xu +*/ +#include "./softmax_output-inl.h" + +namespace mxnet { +namespace op { +template<> +Operator *CreateOp(SoftmaxOutputParam param) { + return new SoftmaxOutputOp(param); +} + +Operator *SoftmaxOutputProp::CreateOperator(Context ctx) const { + DO_BIND_DISPATCH(CreateOp, param_); +} + +DMLC_REGISTER_PARAMETER(SoftmaxOutputParam); + +MXNET_REGISTER_OP_PROPERTY(SoftmaxOutput, SoftmaxOutputProp) +.describe("Perform a softmax transformation on input, backprop with logloss.") +.add_argument("data", "Symbol", "Input data to softmax.") +.add_arguments(SoftmaxOutputParam::__FIELDS__()); + +MXNET_REGISTER_OP_PROPERTY(Softmax, DeprecatedSoftmaxProp) +.describe("DEPRECATED: Perform a softmax transformation on input. Please use SoftmaxOutput") +.add_argument("data", "Symbol", "Input data to softmax.") +.add_arguments(SoftmaxOutputParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet + diff --git a/src/operator/softmax.cu b/src/operator/softmax_output.cu similarity index 51% rename from src/operator/softmax.cu rename to src/operator/softmax_output.cu index 0ebbfc16ce68..02b92c041c47 100644 --- a/src/operator/softmax.cu +++ b/src/operator/softmax_output.cu @@ -1,17 +1,17 @@ /*! * Copyright (c) 2015 by Contributors - * \file softmax.cu + * \file softmax_output.cu * \brief * \author Bing Xu */ -#include "./softmax-inl.h" +#include "./softmax_output-inl.h" namespace mxnet { namespace op { template<> -Operator *CreateOp(SoftmaxParam param) { - return new SoftmaxOp(param); +Operator *CreateOp(SoftmaxOutputParam param) { + return new SoftmaxOutputOp(param); } } // namespace op diff --git a/tests/python/common/models.py b/tests/python/common/models.py index 71df3f07cf47..2c998afcd1db 100644 --- a/tests/python/common/models.py +++ b/tests/python/common/models.py @@ -24,6 +24,6 @@ def conv(): fl = mx.symbol.Flatten(data = mp2, name="flatten") fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10) - softmax = mx.symbol.Softmax(data = fc2, name = 'sm') + softmax = mx.symbol.SoftmaxOutput(data = fc2, name = 'sm') return softmax diff --git a/tests/python/multi-node/common.py b/tests/python/multi-node/common.py index 0db092462a78..d35a1a1fe3f4 100644 --- a/tests/python/multi-node/common.py +++ b/tests/python/multi-node/common.py @@ -86,7 +86,7 @@ def mlp(): fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10) - softmax = mx.symbol.Softmax(fc3, name = 'sm') + softmax = mx.symbol.SoftmaxOutput(fc3, name = 'sm') return softmax def lenet(): @@ -109,7 +109,7 @@ def lenet(): # second fullc fc2 = mx.symbol.FullyConnected(data=tanh3, num_hidden=10) # loss - lenet = mx.symbol.Softmax(data=fc2) + lenet = mx.symbol.SoftmaxOutput(data=fc2) return lenet # Basic Conv + BN + ReLU factory @@ -155,5 +155,5 @@ def inception(): pool = mx.symbol.Pooling(data=in5b, pool_type="avg", kernel=(7,7), name="global_pool") flatten = mx.symbol.Flatten(data=pool, name="flatten1") fc = mx.symbol.FullyConnected(data=flatten, num_hidden=10, name="fc1") - softmax = mx.symbol.Softmax(data=fc, name="loss") + softmax = mx.symbol.SoftmaxOutput(data=fc, name="loss") return softmax diff --git a/tests/python/multi-node/imagenet.py b/tests/python/multi-node/imagenet.py index 7663df8d1bad..f4d7c1e35bb3 100644 --- a/tests/python/multi-node/imagenet.py +++ b/tests/python/multi-node/imagenet.py @@ -97,5 +97,5 @@ def inception(nhidden): # linear classifier flatten = mx.symbol.Flatten(data=avg, name='flatten') fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=nhidden, name='fc1') - softmax = mx.symbol.Softmax(data=fc1, name='softmax') + softmax = mx.symbol.SotfmaxOutput(data=fc1, name='softmax') return softmax diff --git a/tests/python/train/test_conv.py b/tests/python/train/test_conv.py index bc068153c24e..9d8f77fd7c65 100644 --- a/tests/python/train/test_conv.py +++ b/tests/python/train/test_conv.py @@ -24,7 +24,7 @@ fl = mx.symbol.Flatten(data = mp2, name="flatten") fc2 = mx.symbol.FullyConnected(data = fl, name='fc2', num_hidden=10) -softmax = mx.symbol.Softmax(data = fc2, name = 'sm') +softmax = mx.symbol.SoftmaxOutput(data = fc2, name = 'sm') num_epoch = 1 model = mx.model.FeedForward(softmax, mx.cpu(), diff --git a/tests/python/train/test_mlp.py b/tests/python/train/test_mlp.py index 5f1c27062066..84a6f17f47d5 100644 --- a/tests/python/train/test_mlp.py +++ b/tests/python/train/test_mlp.py @@ -14,7 +14,7 @@ fc2 = mx.symbol.FullyConnected(act1, name = 'fc2', num_hidden = 64) act2 = mx.symbol.Activation(fc2, name='relu2', act_type="relu") fc3 = mx.symbol.FullyConnected(act2, name='fc3', num_hidden=10) -softmax = mx.symbol.Softmax(fc3, name = 'sm') +softmax = mx.symbol.SoftmaxOutput(fc3, name = 'sm') def accuracy(label, pred): py = np.argmax(pred, axis=1) diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index c00350a3ad28..fca0093a09c9 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -77,7 +77,7 @@ def test_ndarray_choose(): for repeat in range(nrepeat): indices = np.random.randint(shape[1], size=shape[0]) assert same(npy[np.arange(shape[0]), indices], - mx.nd.choose_element(arr, mx.nd.array(indices)).asnumpy()) + mx.nd.choose_element_0index(arr, mx.nd.array(indices)).asnumpy()) def test_ndarray_choose(): diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index b0743a6f0bb6..c767648f1ece 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -49,45 +49,34 @@ def test_elementwise_sum(): shape = tuple(np.random.randint(1, int(1000**(1.0/dim)), size=dim)) check_elementwise_sum_with_shape(shape, np.random.randint(1, 8)) -def check_slice_channel(dim): +def check_slice_channel(dim, num): + ins = [] if dim == 2: - a = np.ones((2, 2)) * 1. - b = np.ones((2, 2)) * 2. - c = np.ones((2, 2)) * 3. - d = np.ones((2, 2)) * 4. - e = np.hstack((a, b, c, d)) - elif dim == 4: - a = np.ones((2, 2, 2, 2)) * 1. - b = np.ones((2, 2, 2, 2)) * 2. - c = np.ones((2, 2, 2, 2)) * 3. - d = np.ones((2, 2, 2, 2)) * 4. - e = np.hstack((a, b, c, d)) + shape = (2,2) + else: + shape = (2, 2, 2 ,3) + ins = [np.ones(shape) * i for i in range(num)] + e = np.hstack(ins) + e_nd = mx.nd.empty(e.shape) e_nd[:] = e data = mx.sym.Variable('data') - op = mx.sym.SliceChannel(data=data, num_outputs=4) + op = mx.sym.SliceChannel(data=data, num_outputs=num) arg_shape, output_shape, aux_shape = op.infer_shape(data=e_nd.shape) grad_nd = [mx.nd.empty(shape) for shape in arg_shape] exe = op.bind(mx.cpu(), args=[e_nd], args_grad=grad_nd) - assert len(exe.outputs) == 4 - o1_nd = exe.outputs[0] - o2_nd = exe.outputs[1] - o3_nd = exe.outputs[2] - o4_nd = exe.outputs[3] + assert len(exe.outputs) == num + o_nd = [exe.outputs[i] for i in range(num)] # test forward exe.forward() - assert reldiff(o1_nd.asnumpy(), a) < 1e-5 - assert reldiff(o2_nd.asnumpy(), b) < 1e-5 - assert reldiff(o3_nd.asnumpy(), c) < 1e-5 - assert reldiff(o4_nd.asnumpy(), d) < 1e-5 + for i in range(num): + assert reldiff(o_nd[i].asnumpy(), ins[i]) < 1e-5 # test backward - o1_nd += 4. - o2_nd += 3. - o3_nd += 2. - o4_nd += 1. - exe.backward([o1_nd, o2_nd, o3_nd, o4_nd]) - assert reldiff(grad_nd[0].asnumpy(), np.hstack((a+4,b+3, c+2, d+1))) < 1e-5 + for i in range(num): + o_nd[i] += i + exe.backward(o_nd) + assert reldiff(grad_nd[0].asnumpy(), np.hstack([ins[i] + i for i in range(num)])) < 1e-5 def check_concat_with_shape(shapes): n = len(shapes) @@ -140,8 +129,9 @@ def test_concat(): check_concat_with_shape(shapes) def test_slice_channel(): - check_slice_channel(2) - check_slice_channel(4) + check_slice_channel(2, 4) + check_slice_channel(4, 4) + check_slice_channel(2, 16) def check_regression(symbol, forward, backward): data = mx.symbol.Variable('data') @@ -202,10 +192,25 @@ def check_multi_softmax_with_shape(shape, xpu): exec1.backward() print(grad.asnumpy()) +def test_python_op(): + X = mx.symbol.Variable('X') + op = mx.operator.NumpyOp() + s = op.get_symbol(X, name='numpy_op') + + x = mx.ndarray.ones((10))*10 + dx = mx.ndarray.zeros((10)) + dy = mx.ndarray.ones((10)) + exec1 = s.bind(mx.cpu(), args=[x], args_grad = {'X': dx}) + exec1.forward() + assert reldiff(x.asnumpy(), exec1.outputs[0].asnumpy()) < 1e-5 + exec1.backward(dy) + assert reldiff(dy.asnumpy(), dx.asnumpy()) < 1e-5 + if __name__ == '__main__': test_elementwise_sum() test_concat() test_slice_channel() test_regression() + test_python_op() #check_softmax_with_shape((3,4), mx.cpu()) #check_multi_softmax_with_shape((3,4,5), mx.cpu()) diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index a0ebcd9edfcd..451f2e272a50 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -45,6 +45,30 @@ if [ ${TASK} == "cpp_test" ]; then exit 0 fi +if [ ${TASK} == "r_test" ]; then + make all || exit -1 + # use cached dir for storing data + rm -rf ${PWD}/data + mkdir -p ${CACHE_PREFIX}/data + ln -s ${CACHE_PREFIX}/data ${PWD}/data + + set -e + export _R_CHECK_TIMINGS_=0 + export R_BUILD_ARGS="--no-build-vignettes --no-manual" + export R_CHECK_ARGS="--no-vignettes --no-manual" + + curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh + chmod 755 ./travis-tool.sh + ./travis-tool.sh bootstrap + ./travis-tool.sh install_aptget r-cran-testthat r-cran-Rcpp r-cran-DiagrammeR r-cran-data.table r-cran-jsonlite r-cran-magrittr r-cran-stringr + + R CMD INSTALL R-package + cd ./R-package + ../travis-tool.sh install_deps + ../travis-tool.sh run_tests + exit 0 +fi + if [ ${TASK} == "python_test" ]; then make all || exit -1 # use cached dir for storing data diff --git a/tests/travis/travis_after_failure.sh b/tests/travis/travis_after_failure.sh new file mode 100644 index 000000000000..ad9616edd94c --- /dev/null +++ b/tests/travis/travis_after_failure.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +if [ ${TASK} == "r_test" ]; then + cat mxnet/mxnet.Rcheck/*.log +fi \ No newline at end of file diff --git a/tools/caffe_converter/convert_model.py b/tools/caffe_converter/convert_model.py index 7f362dbbe06d..87c51b82692a 100644 --- a/tools/caffe_converter/convert_model.py +++ b/tools/caffe_converter/convert_model.py @@ -51,10 +51,10 @@ def main(): first_conv = False model = mx.model.FeedForward(ctx=mx.cpu(), symbol=prob, - arg_params=arg_params, aux_params={}, num_round=1, + arg_params=arg_params, aux_params={}, num_epoch=1, learning_rate=0.05, momentum=0.9, wd=0.0001) model.save(args.save_model_name) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/tools/caffe_converter/convert_symbol.py b/tools/caffe_converter/convert_symbol.py index 9b5bcde99848..88acea0b7c4e 100644 --- a/tools/caffe_converter/convert_symbol.py +++ b/tools/caffe_converter/convert_symbol.py @@ -41,7 +41,7 @@ def proto2script(proto_file): name = layer[i].name.replace('/', '_') if layer[i].type == 'Convolution' or layer[i].type == 4: type_string = 'mx.symbol.Convolution' - param = layer[i].convolution_param + param = layer[i].convolution_param pad = 0 if len(param.pad) == 0 else param.pad[0] stride = 1 if len(param.stride) == 0 else param.stride[0] param_string = "num_filter=%d, pad=(%d,%d), kernel=(%d,%d), stride=(%d,%d), no_bias=%s" %\ @@ -67,7 +67,7 @@ def proto2script(proto_file): need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'LRN' or layer[i].type == 15: type_string = 'mx.symbol.LRN' - param = layer[i].lrn_param + param = layer[i].lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" %\ (param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True @@ -82,7 +82,7 @@ def proto2script(proto_file): param_string = "p=%f" % param.dropout_ratio need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] if layer[i].type == 'Softmax' or layer[i].type == 20: - type_string = 'mx.symbol.Softmax' + type_string = 'mx.symbol.SoftmaxOutput' # We only support single output network for now. output_name = name @@ -96,7 +96,7 @@ def proto2script(proto_file): need_flatten[name] = True if type_string == '': raise Exception('Unknown Layer %s!' % layer[i].type) - + if type_string != 'split': bottom = layer[i].bottom if param_string != "": @@ -137,4 +137,4 @@ def main(): print(symbol_string) if __name__ == '__main__': - main() \ No newline at end of file + main()