From 9de9af38fbc3c578b9b39249147c784f8a1fdef9 Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Sun, 20 Dec 2015 22:48:38 +0800 Subject: [PATCH 01/27] add SGD optimizer but not tested yet --- .../scala/ml/dmlc/mxnet/LRScheduler.scala | 5 ++ .../main/scala/ml/dmlc/mxnet/Optimizer.scala | 29 +++++++++- .../scala/ml/dmlc/mxnet/optimizer/SGD.scala | 56 +++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala create mode 100644 scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala new file mode 100644 index 000000000000..c782897b0f66 --- /dev/null +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -0,0 +1,5 @@ +package ml.dmlc.mxnet + +class LRScheduler { + def apply(numUpdate: Int): Float = ??? +} diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala index 0d0cd38d6638..f9a58f5ca4db 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/Optimizer.scala @@ -1,9 +1,11 @@ package ml.dmlc.mxnet +import scala.collection.mutable + object Optimizer { def getUpdater(optimizer: Optimizer): MXKVStoreUpdater = { new MXKVStoreUpdater { - private val states = new scala.collection.mutable.HashMap[Int, AnyRef] + val states = new scala.collection.mutable.HashMap[Int, AnyRef] override def update(index: Int, grad: NDArray, weight: NDArray, handle: AnyRef): Unit = { val state = states.getOrElseUpdate(index, optimizer.createState(index, weight)) optimizer.update(index, weight, grad, state) @@ -12,7 +14,11 @@ object Optimizer { } } -abstract class Optimizer extends Serializable { +abstract class Optimizer(protected var rescaleGrad: Float = 1f) extends Serializable { + protected var lrScale: mutable.Map[Int, Float] = mutable.HashMap.empty[Int, Float] + protected var numUpdate: Int = 0 + protected val indexUpdateCount: mutable.Map[Int, Int] = mutable.HashMap.empty[Int, Int] + /** * Update the parameters. * @param index An unique integer key used to index the parameters @@ -21,10 +27,27 @@ abstract class Optimizer extends Serializable { * @param state NDArray or other objects returned by initState * The auxiliary state used in optimization. */ - def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = ??? + // TODO: make state a ClassTag + def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit // Create additional optimizer state such as momentum. + // TODO: make returned state a ClassTag def createState(index: Int, weight: NDArray): AnyRef + + // Set individual learning rate scale for parameters + def setLrScale(lrScale: Map[Int, Float]) { + this.lrScale = mutable.Map(lrScale.toSeq: _*) + } + + /** + * update num_update + * @param index The index will be updated + */ + protected def updateCount(index: Int): Unit = { + val count = indexUpdateCount.getOrElseUpdate(index, 0) + 1 + indexUpdateCount.update(index, count) + numUpdate = Math.max(count, numUpdate) + } } trait MXKVStoreUpdater { diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala new file mode 100644 index 000000000000..fa89c873c654 --- /dev/null +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala @@ -0,0 +1,56 @@ +package ml.dmlc.mxnet.optimizer + +import ml.dmlc.mxnet.{Optimizer, LRScheduler, NDArray} +import ml.dmlc.mxnet.NDArrayConversions._ + +/** + * A very simple SGD optimizer with momentum and weight regularization. + * @author Yizhi Liu + */ +class SGD(val learningRate: Float = 0.01f, val momentum: Float = 0.0f, + val wd: Float = 0.0001f, rescaleGrad: Float = 1f, val clipGradient: Float = 0f, + val lrScheduler: LRScheduler = null) extends Optimizer(rescaleGrad: Float) { + /** + * Update the parameters. + * @param index An unique integer key used to index the parameters + * @param weight weight ndarray + * @param grad grad ndarray + * @param state NDArray or other objects returned by initState + * The auxiliary state used in optimization. + */ + override def update(index: Int, weight: NDArray, grad: NDArray, state: AnyRef): Unit = { + // TODO(bing) implement wd_bias, wd_gamma, wd_beta (copy from python package) + val lr = + (if (lrScheduler != null) { + val scheduledLr = lrScheduler(numUpdate) + updateCount(index) + scheduledLr + } else { + this.learningRate + }) * lrScale.getOrElse(index, 1f) + + var resdGrad = grad * rescaleGrad + if (clipGradient != 0f) { + resdGrad = NDArray._genericNDArrayFunction( + "clip", Array(resdGrad, -clipGradient, clipGradient))(0) + } + if (state != null) { + val mom = state.asInstanceOf[NDArray] + mom *= momentum + mom += -lr * (grad + wd * weight) + weight += mom + } else { + require(momentum == 0f) + weight += -lr * (grad + wd * weight) + } + } + + // Create additional optimizer state such as momentum. + override def createState(index: Int, weight: NDArray): AnyRef = { + if (momentum == 0.0f) { + null + } else { + NDArray.zeros(weight.shape, weight.context) + } + } +} From 8846e7ab1fbe25a22127f988dc1c247d1e3a3adc Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 9 Dec 2015 14:48:04 -0800 Subject: [PATCH 02/27] [EXECUTOR] Experimental Mirror option to squeez mem --- src/symbol/graph_executor.cc | 57 ++++++++++++++++------- src/symbol/graph_executor.h | 2 + src/symbol/static_graph.cc | 89 ++++++++++++++++++++++++++++++------ src/symbol/static_graph.h | 9 ++-- src/symbol/symbol.cc | 5 +- 5 files changed, 129 insertions(+), 33 deletions(-) diff --git a/src/symbol/graph_executor.cc b/src/symbol/graph_executor.cc index 54eb7355924b..6a412a318d35 100644 --- a/src/symbol/graph_executor.cc +++ b/src/symbol/graph_executor.cc @@ -278,7 +278,13 @@ void GraphExecutor::InitGraph(const Symbol &symbol, // initialize all internal data structures graph_.FromSymbol(symbol); if (need_backward) { - graph_.MakeBackwardPass(&head_grad_nodes_, &arg_grads_); + std::map mirror; + graph_.MakeBackwardPass(&head_grad_nodes_, &arg_grads_, &mirror); + for (auto kv : mirror) { + if (kv.first != kv.second) { + mirror_source_map_[kv.second] = kv.first; + } + } } // assign context, this will change the graph. std::vector ctx_assignment; @@ -293,19 +299,33 @@ void GraphExecutor::InitGraph(const Symbol &symbol, } std::sort(head_nodes.begin(), head_nodes.end()); head_nodes.resize(std::unique(head_nodes.begin(), head_nodes.end()) - head_nodes.begin()); - std::vector fwd_nodes = graph_.PostDFSOrder(head_nodes); + std::vector fwd_nodes = graph_.PostDFSOrder(head_nodes, {}); + num_forward_nodes_ = fwd_nodes.size(); + std::unordered_set fwd_set(fwd_nodes.begin(), fwd_nodes.end()); std::vector topo = graph_.TopoSort(); std::vector backward; + for (uint32_t nid : topo) { if (fwd_set.count(nid) != 0) { topo_order_.push_back(nid); } else { - backward.push_back(nid); + // TODO(tqchen) find less hacky way to decide mirror node. + const std::string& name = graph_.nodes[nid].name; + bool is_mirror = graph_.nodes[nid].is_forward() && + name.substr(name.length() - 7, 7) == "_mirror"; + if (!is_mirror) backward.push_back(nid); } } - num_forward_nodes_ = fwd_nodes.size(); - topo_order_.insert(topo_order_.end(), backward.begin(), backward.end()); + std::unordered_set finished(fwd_nodes.begin(), fwd_nodes.end()); + for (uint32_t nid : backward) { + std::vector pass = graph_.PostDFSOrder({nid}, finished); + topo_order_.insert(topo_order_.end(), pass.begin(), pass.end()); + finished.insert(pass.begin(), pass.end()); + } + for (uint32_t nid : topo) { + if (finished.count(nid) == 0) topo_order_.push_back(nid); + } // setup all the operator nodes data structure op_nodes_.resize(graph_.nodes.size()); @@ -506,27 +526,32 @@ void GraphExecutor::InitDataEntryInfo(const std::vector &in_args, op_nodes_[i].outputs[j].shape = out_shapes[i][j]; } } + // bind aux args size_t aux_ndarray_idx = 0; - for (size_t i = 0; i < aux_shapes.size(); ++i) { + for (auto i : topo_order_) { op_nodes_[i].aux_states.resize(aux_shapes[i].size()); for (size_t j = 0; j < aux_shapes[i].size(); ++j) { DataEntryInfo &info = op_nodes_[i].aux_states[j]; info.shape = aux_shapes[i][j]; info.type = kBindByExternal; - if (graph_.nodes[i].backward_source_id == -1) { - info.data = aux_states[aux_ndarray_idx++]; - CHECK(info.data.ctx() == op_nodes_[i].ctx) - << "Auxiliary NDArray's context must match the operator's context assignment"; + if (mirror_source_map_.count(i) == 0) { + if (graph_.nodes[i].backward_source_id == -1) { + info.data = aux_states[aux_ndarray_idx++]; + CHECK(info.data.ctx() == op_nodes_[i].ctx) + << "Auxiliary NDArray's context must match the operator's context assignment"; + } else { + CHECK_NE(graph_.nodes[i].backward_source_id, -1) + << "Input auxiliary NDArray is less than required"; + info.data = op_nodes_[graph_.nodes[i].backward_source_id].aux_states[j].data; + } } else { - CHECK_NE(graph_.nodes[i].backward_source_id, -1) - << "Input auxiliary NDArray is less than required"; - info.data = op_nodes_[graph_.nodes[i].backward_source_id].aux_states[j].data; + info.data = op_nodes_[mirror_source_map_[i]].aux_states[j].data; } CHECK_EQ(info.data.data().shape_, info.shape) - << "Incorrect NDArray shape" - << " Input: " << info.data.data().shape_ - << " Desired: " << info.shape; + << "Incorrect NDArray shape" + << " Input: " << info.data.data().shape_ + << " Desired: " << info.shape; } } } diff --git a/src/symbol/graph_executor.h b/src/symbol/graph_executor.h index aa93d802598c..1ec47294a124 100644 --- a/src/symbol/graph_executor.h +++ b/src/symbol/graph_executor.h @@ -223,6 +223,8 @@ class GraphExecutor : public Executor { size_t num_forward_nodes_; // head gradient node in the graph, if there is backward pass std::vector head_grad_nodes_; + // mirror map of nodes, experimental feature, normally can be ignored. + std::map mirror_source_map_; // argument node in the graph, if there is backward pass std::vector arg_grads_; // operational nodes diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index 7648ae22b7bf..ca40a007c64a 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -13,14 +13,18 @@ namespace mxnet { -std::vector StaticGraph::PostDFSOrder(const std::vector& head_nodes) const { +std::vector StaticGraph::PostDFSOrder(const std::vector& head_nodes, + const std::unordered_set& banned) const { std::vector ret; + std::unordered_set visited; ret.reserve(nodes.size() / 2); std::vector > stack; - std::unordered_set visited; // heads for (auto &head : head_nodes) { stack.push_back(std::make_pair(head, 0)); + CHECK_EQ(banned.count(head), 0); + // bugfix + visited.insert(head); } while (!stack.empty()) { std::pair& back = stack.back(); @@ -37,7 +41,7 @@ std::vector StaticGraph::PostDFSOrder(const std::vector& hea } else { input = n.inputs[back.second++].source_id; } - if (visited.count(input) == 0) { + if (visited.count(input) == 0 && banned.count(input) == 0) { stack.push_back(std::make_pair(input, 0)); } } @@ -62,7 +66,7 @@ std::vector StaticGraph::TopoSort() const { head_nodes.push_back(static_cast(i)); } } - return PostDFSOrder(head_nodes); + return PostDFSOrder(head_nodes, {}); } bool StaticGraph::InferNodeShapes(const std::vector &topo_order, @@ -186,10 +190,22 @@ bool StaticGraph::InferShape(std::vector *in_shape, const DataEntry &e = heads[i]; (*out_shape)[i] = node_out_shapes[e.source_id][e.index]; } + + // set back auxiliary nodes. aux_shape->clear(); - for (size_t i = 0; i < node_aux_shapes.size(); ++i) { - if (node_aux_shapes[i].size() > 0) { - for (auto const &shape : node_aux_shapes[i]) { + std::vector head_nodes; + for (const auto& head : heads) { + head_nodes.push_back(head.source_id); + } + std::sort(head_nodes.begin(), head_nodes.end()); + head_nodes.resize(std::unique(head_nodes.begin(), head_nodes.end()) - head_nodes.begin()); + std::vector fwd_nodes = PostDFSOrder(head_nodes, {}); + uint32_t counter = 0; + for (uint32_t nid : fwd_nodes) { + // backward consistentcy check. + CHECK(nid == counter++); + if (node_aux_shapes[nid].size() > 0) { + for (auto const &shape : node_aux_shapes[nid]) { aux_shape->push_back(shape); } } @@ -218,11 +234,50 @@ StaticGraph::Node StaticGraph::CreateCopyNode(const DataEntry &source) { } void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, - std::vector *arg_grads) { - arg_grads->clear(); - head_grad_nodes->clear(); + std::vector* arg_grads, + std::map* out_mirror_map) { // get topo order of nodes, before new nodes are added std::vector topo_order = TopoSort(); + + // build a mirror map, experimental + std::map& mirror_map = *out_mirror_map; + mirror_map.clear(); + int do_mirror = dmlc::GetEnv("MXNET_BACKWARD_DO_MIRROR", 0); + int mirror_step = dmlc::GetEnv("MXNET_BACKWARD_MIRROR_STEP", 100); + int counter = 0; + + auto need_mirror = [this, do_mirror, &counter, mirror_step](uint32_t nid) { + if (do_mirror == 0) return false; + if (!nodes[nid].is_forward()) return false; + std::string type = nodes[nid].op->TypeString(); + if (type == "Convolution") return false; + if (type == "FullyConnected") return false; + if (type == "Dropout") return false; + if (type == "Concat") return false; + if (type == "SoftmaxOutput") return false; + counter = counter + 1; + if (counter % mirror_step == 0) return false; + return true; + }; + + for (uint32_t nid : topo_order) { + if (need_mirror(nid)) { + uint32_t dup_node_id = static_cast(nodes.size()); + Node node(nodes[nid]); + node.name += "_mirror"; + for (DataEntry& e : node.inputs) { + e.source_id = mirror_map.at(e.source_id); + } + nodes.push_back(std::move(node)); + mirror_map[nid] = dup_node_id; + } else { + mirror_map[nid] = nid; + } + } + + // normal gradient + arg_grads->clear(); + head_grad_nodes->clear(); // map out_data entry to out_grad std::map > grad_map; // allocate head gradient nodes @@ -260,11 +315,12 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, int ntotal = nodes[nid].op->NumOutputs(); // check all outpus for (int i = 0; i < ntotal; ++i) { - DataEntry odata(nid, static_cast(i)); + DataEntry odata(mirror_map[nid], static_cast(i)); + DataEntry okey(nid, static_cast(i)); out_data.push_back(odata); if (i >= nvisible) continue; // get out_grad - auto it = grad_map.find(odata); + auto it = grad_map.find(okey); CHECK(it != grad_map.end()) << "bad graph"; std::vector &gnodes = it->second; if (gnodes.size() == 1) { @@ -283,9 +339,15 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, Node grad_node; // Point to the corresponding source grad_node.backward_source_id = nid; + + std::vector source_inputs; + for (const DataEntry& e : nodes[nid].inputs) { + source_inputs.push_back(DataEntry(mirror_map[e.source_id], e.index)); + } // select out the dependent inputs grad_node.inputs = nodes[nid].op->BackwardInputs( - out_grad, nodes[nid].inputs, out_data); + out_grad, source_inputs, out_data); + grad_node.name = nodes[nid].name + "_backward"; uint32_t grad_node_id = static_cast(nodes.size()); nodes.push_back(std::move(grad_node)); @@ -319,6 +381,7 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, arg_grads->at(i) = DataEntry(agg_node_id, 0); } } + LOG(INFO) << "FINSIHED"; } void StaticGraph::Node::Save(dmlc::JSONWriter *writer) const { diff --git a/src/symbol/static_graph.h b/src/symbol/static_graph.h index 661c924dd03b..db93602b80bf 100644 --- a/src/symbol/static_graph.h +++ b/src/symbol/static_graph.h @@ -179,9 +179,11 @@ class StaticGraph { * \brief Get a post DFS order traversal order from the head nodes. * Post DFS order is a special case of Topological order. * \param heads The head of the node. + * \param banned The banned map, used to ban some nodes from the graph. * \return a post DFS visit order of nodes that can reach heads. */ - std::vector PostDFSOrder(const std::vector& head_nodes) const; + std::vector PostDFSOrder(const std::vector& head_nodes, + const std::unordered_set& banned = {}) const; /*! * \brief infer the node shapes in the computation graph. * @@ -226,10 +228,11 @@ class StaticGraph { * * \param head_grad_nodes used to store the created head gradient inputs for backward pass. * \param arg_grads used to store gradients to args, can be multiple one if an argument is used by operator + * \param out_mirror_map The mirror map of the backward plan. */ void MakeBackwardPass(std::vector *head_grad_nodes, - std::vector *arg_grads); - + std::vector *arg_grads, + std::map* out_mirror_map); /*! * \brief Convert symbol into static graph. * \param symbol the symbol to convert from. diff --git a/src/symbol/symbol.cc b/src/symbol/symbol.cc index f36ae322831d..e451c246c350 100644 --- a/src/symbol/symbol.cc +++ b/src/symbol/symbol.cc @@ -494,7 +494,10 @@ Symbol Symbol::Grad(const std::vector& wrt) const { uint32_t num_nodes = g.nodes.size(); std::vector head_grad_nodes; std::vector arg_grads; - g.MakeBackwardPass(&head_grad_nodes, &arg_grads); + // mirror is need to be disabled here. + std::map mirror; + g.MakeBackwardPass(&head_grad_nodes, &arg_grads, &mirror); + std::vector > shared_node; this->DFSVisit([&shared_node](const std::shared_ptr &n) { shared_node.push_back(n); From e395eb0e62f05410892b489428a52cb34e2c9867 Mon Sep 17 00:00:00 2001 From: Lowik CHANUSSOT Date: Sun, 13 Dec 2015 10:48:08 +0100 Subject: [PATCH 03/27] Handle grayscale images. * Add a color parameter in the tool im2rec * Decode rec dataset without forcing color mode. * Substract mean on 1 channel only for grayscale image --- src/io/iter_image_recordio.cc | 27 +++++++++++++++++++-------- src/io/iter_normalize.h | 12 ++++++++---- tools/im2rec.cc | 8 +++++++- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/io/iter_image_recordio.cc b/src/io/iter_image_recordio.cc index 6ca610e8a410..1da7f71c0fd6 100644 --- a/src/io/iter_image_recordio.cc +++ b/src/io/iter_image_recordio.cc @@ -245,19 +245,30 @@ ParseNext(std::vector *out_vec) { cv::Mat res; rec.Load(blob.dptr, blob.size); cv::Mat buf(1, rec.content_size, CV_8U, rec.content); - res = cv::imdecode(buf, 1); + // -1 to keep the number of channel of the encoded image, and not force gray or color. + res = cv::imdecode(buf, -1); + int n_channels = res.channels(); res = augmenters_[tid]->Process(res, prnds_[tid]); out.Push(static_cast(rec.image_index()), - mshadow::Shape3(3, res.rows, res.cols), + mshadow::Shape3(n_channels, res.rows, res.cols), mshadow::Shape1(param_.label_width)); mshadow::Tensor data = out.data().Back(); - for (int i = 0; i < res.rows; ++i) { - for (int j = 0; j < res.cols; ++j) { - cv::Vec3b bgr = res.at(i, j); - data[0][i][j] = bgr[2]; - data[1][i][j] = bgr[1]; - data[2][i][j] = bgr[0]; + // Substract mean value on each channel. + if (n_channels == 3) { + for (int i = 0; i < res.rows; ++i) { + for (int j = 0; j < res.cols; ++j) { + cv::Vec3b bgr = res.at(i, j); + data[0][i][j] = bgr[2]; + data[1][i][j] = bgr[1]; + data[2][i][j] = bgr[0]; + } + } + } else { + for (int i = 0; i < res.rows; ++i) { + for (int j = 0; j < res.cols; ++j) { + data[0][i][j] = res.at(i, j); + } } } mshadow::Tensor label = out.label().Back(); diff --git a/src/io/iter_normalize.h b/src/io/iter_normalize.h index 22fcdc71cdb9..ad91ba691254 100644 --- a/src/io/iter_normalize.h +++ b/src/io/iter_normalize.h @@ -176,10 +176,14 @@ class ImageNormalizeIter : public IIterator { rand_uniform(rnd_) * param_.max_random_illumination * 2 - param_.max_random_illumination; if (param_.mean_r > 0.0f || param_.mean_g > 0.0f || param_.mean_b > 0.0f) { - // substract mean value - data[0] -= param_.mean_r; - data[1] -= param_.mean_g; - data[2] -= param_.mean_b; + // If the input has 3 channels, we substract the mean value on each + if (data.shape_[0] == 3) { + data[0] -= param_.mean_r; + data[1] -= param_.mean_g; + data[2] -= param_.mean_b; + } else { + data[0] -= param_.mean_r; + } if ((param_.rand_mirror && coin_flip(rnd_)) || param_.mirror) { outimg_ = mirror(data * contrast + illumination) * param_.scale; } else { diff --git a/tools/im2rec.cc b/tools/im2rec.cc index 171f1c62280c..f7472ff628b8 100644 --- a/tools/im2rec.cc +++ b/tools/im2rec.cc @@ -25,6 +25,7 @@ int main(int argc, char *argv[]) { if (argc < 4) { printf("Usage: [additional parameters in form key=value]\n"\ "Possible additional parameters:\n"\ + "\tcolor=USE_COLOR[default=1] Use color (1) or gray image (0)\n"\ "\tresize=newsize resize the shorter edge of image to the newsize, original images will be packed by default\n"\ "\tlabel_width=WIDTH[default=1] specify the label_width in the list, by default set to 1\n"\ "\tnsplit=NSPLIT[default=1] used for part generation, logically split the image.list to NSPLIT parts by position\n"\ @@ -39,6 +40,7 @@ int main(int argc, char *argv[]) { int partid = 0; int center_crop = 0; int quality = 80; + int color_mode = CV_LOAD_IMAGE_COLOR; for (int i = 4; i < argc; ++i) { char key[128], val[128]; if (sscanf(argv[i], "%[^=]=%s", key, val) == 2) { @@ -48,6 +50,7 @@ int main(int argc, char *argv[]) { if (!strcmp(key, "part")) partid = atoi(val); if (!strcmp(key, "center_crop")) center_crop = atoi(val); if (!strcmp(key, "quality")) quality = atoi(val); + if (!strcmp(key, "color")) color_mode = atoi(val); } } if (new_size > 0) { @@ -58,6 +61,9 @@ int main(int argc, char *argv[]) { if (center_crop) { LOG(INFO) << "Center cropping to square"; } + if (color_mode == 0) { + LOG(INFO) << "Use gray images"; + } using namespace dmlc; const static size_t kBufferSize = 1 << 20UL; @@ -119,7 +125,7 @@ int main(int argc, char *argv[]) { } delete fi; if (new_size > 0) { - cv::Mat img = cv::imdecode(decode_buf, CV_LOAD_IMAGE_COLOR); + cv::Mat img = cv::imdecode(decode_buf, color_mode); CHECK(img.data != NULL) << "OpenCV decode fail:" << path; if (center_crop) { if (img.rows > img.cols) { From bb18f4797c4949134662b30ecc44603cb7028d2b Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 18 Dec 2015 03:20:03 -0500 Subject: [PATCH 04/27] [EXECUTOR] mirror fix of mirror direction --- src/symbol/graph_executor.cc | 6 ++++-- src/symbol/static_graph.cc | 17 +++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/symbol/graph_executor.cc b/src/symbol/graph_executor.cc index 6a412a318d35..0134135c58e5 100644 --- a/src/symbol/graph_executor.cc +++ b/src/symbol/graph_executor.cc @@ -286,6 +286,7 @@ void GraphExecutor::InitGraph(const Symbol &symbol, } } } + // assign context, this will change the graph. std::vector ctx_assignment; this->AssignContext(default_ctx, ctx_map, @@ -326,7 +327,6 @@ void GraphExecutor::InitGraph(const Symbol &symbol, for (uint32_t nid : topo) { if (finished.count(nid) == 0) topo_order_.push_back(nid); } - // setup all the operator nodes data structure op_nodes_.resize(graph_.nodes.size()); for (size_t i = 0; i < graph_.nodes.size(); ++i) { @@ -509,6 +509,9 @@ void GraphExecutor::InitDataEntryInfo(const std::vector &in_args, op_nodes_[e.source_id].activated = true; } } + if (graph_.nodes[nid].is_backward()) { + op_nodes_[graph_.nodes[nid].backward_source_id].activated = true; + } } // shape inference std::vector > out_shapes(op_nodes_.size()); @@ -526,7 +529,6 @@ void GraphExecutor::InitDataEntryInfo(const std::vector &in_args, op_nodes_[i].outputs[j].shape = out_shapes[i][j]; } } - // bind aux args size_t aux_ndarray_idx = 0; for (auto i : topo_order_) { diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index ca40a007c64a..c2f046d86093 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -245,8 +245,9 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, int do_mirror = dmlc::GetEnv("MXNET_BACKWARD_DO_MIRROR", 0); int mirror_step = dmlc::GetEnv("MXNET_BACKWARD_MIRROR_STEP", 100); int counter = 0; + int *pcounter = &counter; - auto need_mirror = [this, do_mirror, &counter, mirror_step](uint32_t nid) { + auto need_mirror = [this, do_mirror, pcounter, mirror_step](uint32_t nid) { if (do_mirror == 0) return false; if (!nodes[nid].is_forward()) return false; std::string type = nodes[nid].op->TypeString(); @@ -255,8 +256,8 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, if (type == "Dropout") return false; if (type == "Concat") return false; if (type == "SoftmaxOutput") return false; - counter = counter + 1; - if (counter % mirror_step == 0) return false; + ++pcounter[0]; + if (pcounter[0] % mirror_step == 0) return false; return true; }; @@ -304,6 +305,7 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, // do backward pass traverse for (auto it = topo_order.rbegin(); it != topo_order.rend(); ++it) { uint32_t nid = *it; + uint32_t mirror_nid = mirror_map[nid]; // skip variables if (nodes[nid].is_variable()) continue; CHECK(nodes[nid].is_forward()) << "Do not support Backward of Backward"; @@ -315,7 +317,7 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, int ntotal = nodes[nid].op->NumOutputs(); // check all outpus for (int i = 0; i < ntotal; ++i) { - DataEntry odata(mirror_map[nid], static_cast(i)); + DataEntry odata(mirror_nid, static_cast(i)); DataEntry okey(nid, static_cast(i)); out_data.push_back(odata); if (i >= nvisible) continue; @@ -338,17 +340,17 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, // Create a gradient backward node Node grad_node; // Point to the corresponding source - grad_node.backward_source_id = nid; + grad_node.backward_source_id = mirror_nid; std::vector source_inputs; for (const DataEntry& e : nodes[nid].inputs) { source_inputs.push_back(DataEntry(mirror_map[e.source_id], e.index)); } // select out the dependent inputs - grad_node.inputs = nodes[nid].op->BackwardInputs( + grad_node.inputs = nodes[mirror_nid].op->BackwardInputs( out_grad, source_inputs, out_data); - grad_node.name = nodes[nid].name + "_backward"; + grad_node.name = nodes[mirror_nid].name + "_backward"; uint32_t grad_node_id = static_cast(nodes.size()); nodes.push_back(std::move(grad_node)); // update gradient map @@ -381,7 +383,6 @@ void StaticGraph::MakeBackwardPass(std::vector *head_grad_nodes, arg_grads->at(i) = DataEntry(agg_node_id, 0); } } - LOG(INFO) << "FINSIHED"; } void StaticGraph::Node::Save(dmlc::JSONWriter *writer) const { From aefe7343952188b62114ef4fc0da0ec9a36697e7 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 18 Dec 2015 03:20:58 -0500 Subject: [PATCH 05/27] [OP] Fix BatchNorm when requirement is not all writeto, more memory efficient BN --- src/operator/batch_norm-inl.h | 29 ++++++++++++++--------------- src/operator/fully_connected-inl.h | 1 + 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h index 853ac4cb3880..e3fafe0feb76 100644 --- a/src/operator/batch_norm-inl.h +++ b/src/operator/batch_norm-inl.h @@ -94,11 +94,14 @@ class BatchNormOp : public Operator { if (ctx.is_train) { Tensor mean = out_data[batchnorm::kMean].get(s); Tensor var = out_data[batchnorm::kVar].get(s); - Assign(mean, req[batchnorm::kMean], scale * sumall_except_dim<1>(data)); - Assign(var, req[batchnorm::kVar], scale * sumall_except_dim<1>( - F(data - broadcast<1>(mean, data.shape_)))); - Assign(out_no_affine, req[batchnorm::kOutNoAffine], (data - broadcast<1>(mean, data.shape_)) / - F(broadcast<1>(var + param_.eps, data.shape_))); + CHECK(req[batchnorm::kOutNoAffine] == kNullOp || req[batchnorm::kOutNoAffine] == kWriteTo); + CHECK(req[batchnorm::kMean] == kNullOp || req[batchnorm::kMean] == kWriteTo); + CHECK(req[batchnorm::kVar] == kNullOp || req[batchnorm::kVar] == kWriteTo); + // The first three steps must be enforced. + mean = scale * sumall_except_dim<1>(data); + var = scale * sumall_except_dim<1>(F(data - broadcast<1>(mean, data.shape_))); + out_no_affine = (data - broadcast<1>(mean, data.shape_)) / + F(broadcast<1>(var + param_.eps, data.shape_)); Assign(out, req[batchnorm::kOut], out_no_affine * broadcast<1>(slope, out.shape_) + broadcast<1>(bias, out.shape_)); } else { @@ -125,22 +128,20 @@ class BatchNormOp : public Operator { CHECK_EQ(in_grad.size(), 3); Stream *s = ctx.get_stream(); Tensor data, grad, grad_in; - Tensor out, out_no_affine; - const real_t scale = static_cast(out_data[batchnorm::kOut].shape_[1]) / - static_cast(out_data[batchnorm::kOut].shape_.Size()); + Tensor out_no_affine; + const real_t scale = static_cast(out_grad[batchnorm::kOut].shape_[1]) / + static_cast(out_grad[batchnorm::kOut].shape_.Size()); if (in_data[batchnorm::kData].ndim() == 2) { - Shape<4> dshape = Shape4(out_data[batchnorm::kOut].shape_[0], - out_data[batchnorm::kOut].shape_[1], 1, 1); + Shape<4> dshape = Shape4(out_grad[batchnorm::kOut].shape_[0], + out_grad[batchnorm::kOut].shape_[1], 1, 1); data = in_data[batchnorm::kData].get_with_shape(dshape, s); grad = out_grad[batchnorm::kOut].get_with_shape(dshape, s); grad_in = in_grad[batchnorm::kData].get_with_shape(dshape, s); - out = out_data[batchnorm::kOut].get_with_shape(dshape, s); out_no_affine = out_data[batchnorm::kOutNoAffine].get_with_shape(dshape, s); } else { data = in_data[batchnorm::kData].get(s); grad = out_grad[batchnorm::kOut].get(s); grad_in = in_grad[batchnorm::kData].get(s); - out = out_data[batchnorm::kOut].get(s); out_no_affine = out_data[batchnorm::kOutNoAffine].get(s); } @@ -152,7 +153,7 @@ class BatchNormOp : public Operator { Tensor gbias = in_grad[batchnorm::kBeta].get(s); // get requested temp space Tensor workspace = ctx.requested[batchnorm::kTempSpace].get_space( - mshadow::Shape2(3, out.shape_[1]), s); + mshadow::Shape2(3, mean.shape_[0]), s); Tensor gmean = workspace[0]; Tensor gvar = workspace[1]; Tensor tmp = workspace[2]; @@ -239,7 +240,6 @@ class BatchNormProp : public OperatorProperty { const std::vector &in_data, const std::vector &out_data) const override { return {out_grad[batchnorm::kOut], - out_data[batchnorm::kOut], out_data[batchnorm::kOutNoAffine], out_data[batchnorm::kMean], out_data[batchnorm::kVar], @@ -292,4 +292,3 @@ class BatchNormProp : public OperatorProperty { } // namespace op } // namespace mxnet #endif // MXNET_OPERATOR_BATCH_NORM_INL_H_ - diff --git a/src/operator/fully_connected-inl.h b/src/operator/fully_connected-inl.h index 3454c3498cf9..15798426d692 100644 --- a/src/operator/fully_connected-inl.h +++ b/src/operator/fully_connected-inl.h @@ -57,6 +57,7 @@ class FullyConnectedOp : public Operator { const std::vector &aux_args) { using namespace mshadow; using namespace mshadow::expr; + if (req[fullc::kOut] == kNullOp) return; CHECK_EQ(req[fullc::kOut], kWriteTo); size_t expected = param_.no_bias ? 2 : 3; CHECK_EQ(in_data.size(), expected); From e3c3473ff0ebe7afda8ad22eb08aa9d3b8f3d2b0 Mon Sep 17 00:00:00 2001 From: Chuntao Hong Date: Fri, 18 Dec 2015 17:27:42 +0800 Subject: [PATCH 06/27] fix nvrtc problem on windows --- CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7846f9a9c6a0..bced5a6215b9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,7 +91,14 @@ endif() if(USE_CUDA) # define preprocessor macro so that we will not include the generated forcelink header mshadow_cuda_compile(cuda_objs ${cuda}) - list(APPEND mshadow_LINKER_LIBS nvrtc cuda) + if(MSVC) + FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32") + list(APPEND mshadow_LINKER_LIBS ${CUDA_nvrtc_LIBRARY}) + set(CUDA_cuda_LIBRARY "${CUDA_nvrtc_LIBRARY}/../cuda.lib") + list(APPEND mshadow_LINKER_LIBS ${CUDA_cuda_LIBRARY}) + else(MSVC) + list(APPEND mshadow_LINKER_LIBS nvrtc cuda) + endif() list(APPEND SOURCE ${cuda_objs} ${cuda}) endif() From 510ca838e613e0263fd00d9f53293b6fd5b94263 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 18 Dec 2015 03:40:49 -0500 Subject: [PATCH 07/27] [EXEC] fix g++ 4.9 --- src/operator/batch_norm-inl.h | 3 ++- src/symbol/static_graph.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h index e3fafe0feb76..4003bc50378a 100644 --- a/src/operator/batch_norm-inl.h +++ b/src/operator/batch_norm-inl.h @@ -99,7 +99,8 @@ class BatchNormOp : public Operator { CHECK(req[batchnorm::kVar] == kNullOp || req[batchnorm::kVar] == kWriteTo); // The first three steps must be enforced. mean = scale * sumall_except_dim<1>(data); - var = scale * sumall_except_dim<1>(F(data - broadcast<1>(mean, data.shape_))); + var = scale * sumall_except_dim<1>(F( + data - broadcast<1>(mean, data.shape_))); out_no_affine = (data - broadcast<1>(mean, data.shape_)) / F(broadcast<1>(var + param_.eps, data.shape_)); Assign(out, req[batchnorm::kOut], out_no_affine * broadcast<1>(slope, out.shape_) + diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index c2f046d86093..d2ac61361a9a 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -66,7 +66,7 @@ std::vector StaticGraph::TopoSort() const { head_nodes.push_back(static_cast(i)); } } - return PostDFSOrder(head_nodes, {}); + return PostDFSOrder(head_nodes, std::unordered_set()); } bool StaticGraph::InferNodeShapes(const std::vector &topo_order, @@ -199,7 +199,7 @@ bool StaticGraph::InferShape(std::vector *in_shape, } std::sort(head_nodes.begin(), head_nodes.end()); head_nodes.resize(std::unique(head_nodes.begin(), head_nodes.end()) - head_nodes.begin()); - std::vector fwd_nodes = PostDFSOrder(head_nodes, {}); + std::vector fwd_nodes = PostDFSOrder(head_nodes, std::unordered_set()); uint32_t counter = 0; for (uint32_t nid : fwd_nodes) { // backward consistentcy check. From 44853630c4d69e3822f1b68605c957ec2b27d610 Mon Sep 17 00:00:00 2001 From: yajiedesign Date: Sat, 19 Dec 2015 11:21:03 +0800 Subject: [PATCH 08/27] add cudnn v4 with CUDNN_MAJOR auto switch v3/v4 --- src/common/cuda_utils.h | 95 +++++++++++++------------- src/operator/cudnn_convolution-inl.h | 16 ++++- src/operator/cudnn_deconvolution-inl.h | 67 ++++++++++-------- 3 files changed, 101 insertions(+), 77 deletions(-) diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h index bbc7961c2642..839ee0ec26fa 100644 --- a/src/common/cuda_utils.h +++ b/src/common/cuda_utils.h @@ -25,26 +25,26 @@ namespace cuda { */ inline const char* CublasGetErrorString(cublasStatus_t error) { switch (error) { - case CUBLAS_STATUS_SUCCESS: - return "CUBLAS_STATUS_SUCCESS"; - case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; - case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; - case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; - case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; - case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; - case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; - case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; - case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; - default: - break; + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + default: + break; } return "Unknown cuBLAS status"; } @@ -56,32 +56,32 @@ inline const char* CublasGetErrorString(cublasStatus_t error) { */ inline const char* CurandGetErrorString(curandStatus_t status) { switch (status) { - case CURAND_STATUS_SUCCESS: - return "CURAND_STATUS_SUCCESS"; - case CURAND_STATUS_VERSION_MISMATCH: - return "CURAND_STATUS_VERSION_MISMATCH"; - case CURAND_STATUS_NOT_INITIALIZED: - return "CURAND_STATUS_NOT_INITIALIZED"; - case CURAND_STATUS_ALLOCATION_FAILED: - return "CURAND_STATUS_ALLOCATION_FAILED"; - case CURAND_STATUS_TYPE_ERROR: - return "CURAND_STATUS_TYPE_ERROR"; - case CURAND_STATUS_OUT_OF_RANGE: - return "CURAND_STATUS_OUT_OF_RANGE"; - case CURAND_STATUS_LENGTH_NOT_MULTIPLE: - return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; - case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: - return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; - case CURAND_STATUS_LAUNCH_FAILURE: - return "CURAND_STATUS_LAUNCH_FAILURE"; - case CURAND_STATUS_PREEXISTING_FAILURE: - return "CURAND_STATUS_PREEXISTING_FAILURE"; - case CURAND_STATUS_INITIALIZATION_FAILED: - return "CURAND_STATUS_INITIALIZATION_FAILED"; - case CURAND_STATUS_ARCH_MISMATCH: - return "CURAND_STATUS_ARCH_MISMATCH"; - case CURAND_STATUS_INTERNAL_ERROR: - return "CURAND_STATUS_INTERNAL_ERROR"; + case CURAND_STATUS_SUCCESS: + return "CURAND_STATUS_SUCCESS"; + case CURAND_STATUS_VERSION_MISMATCH: + return "CURAND_STATUS_VERSION_MISMATCH"; + case CURAND_STATUS_NOT_INITIALIZED: + return "CURAND_STATUS_NOT_INITIALIZED"; + case CURAND_STATUS_ALLOCATION_FAILED: + return "CURAND_STATUS_ALLOCATION_FAILED"; + case CURAND_STATUS_TYPE_ERROR: + return "CURAND_STATUS_TYPE_ERROR"; + case CURAND_STATUS_OUT_OF_RANGE: + return "CURAND_STATUS_OUT_OF_RANGE"; + case CURAND_STATUS_LENGTH_NOT_MULTIPLE: + return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; + case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: + return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; + case CURAND_STATUS_LAUNCH_FAILURE: + return "CURAND_STATUS_LAUNCH_FAILURE"; + case CURAND_STATUS_PREEXISTING_FAILURE: + return "CURAND_STATUS_PREEXISTING_FAILURE"; + case CURAND_STATUS_INITIALIZATION_FAILED: + return "CURAND_STATUS_INITIALIZATION_FAILED"; + case CURAND_STATUS_ARCH_MISMATCH: + return "CURAND_STATUS_ARCH_MISMATCH"; + case CURAND_STATUS_INTERNAL_ERROR: + return "CURAND_STATUS_INTERNAL_ERROR"; } return "Unknown cuRAND status"; } @@ -151,5 +151,8 @@ inline const char* CurandGetErrorString(curandStatus_t status) { CHECK_EQ(e, CUDNN_STATUS_SUCCESS) << "cuDNN: " << cudnnGetErrorString(e); \ } +#define CUDNN_VERSION_EQUAL(major, minor) \ + ((CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100) == (major * 1000 + minor * 100)) + #endif // MXNET_USE_CUDNN #endif // MXNET_COMMON_CUDA_UTILS_H_ diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h index f2efa356f491..5187909abfa0 100644 --- a/src/operator/cudnn_convolution-inl.h +++ b/src/operator/cudnn_convolution-inl.h @@ -10,7 +10,6 @@ #include #include #include "./convolution-inl.h" - namespace mxnet { namespace op { #if defined(__CUDACC__) && MXNET_USE_CUDNN == 1 @@ -57,7 +56,7 @@ class CuDNNConvolutionOp : public Operator { Init(s, in_data, out_data); } Tensor workspace = ctx.requested[conv::kTempSpace].get_space( - mshadow::Shape1(forward_workspace_), s); + mshadow::Shape1(forward_workspace_), s); CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_, &alpha, in_desc_, @@ -74,6 +73,16 @@ class CuDNNConvolutionOp : public Operator { if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[conv::kBias].get(s); +#if CUDNN_VERSION_EQUAL(4,0) + CHECK_EQ(cudnnAddTensor(s->dnn_handle_, + &alpha, + bias_desc_, + bias.dptr_, + &beta, + out_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); +#endif +#if CUDNN_VERSION_EQUAL(3,0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, @@ -82,6 +91,7 @@ class CuDNNConvolutionOp : public Operator { &beta, out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); +#endif } } @@ -108,7 +118,7 @@ class CuDNNConvolutionOp : public Operator { Tensor data = in_data[conv::kData].get(s); Tensor gdata = in_grad[conv::kData].get(s); Tensor workspace = ctx.requested[conv::kTempSpace].get_space( - mshadow::Shape1(backward_workspace_), s); + mshadow::Shape1(backward_workspace_), s); if (!param_.no_bias) { Tensor gbias = in_grad[conv::kBias].get(s); CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_, diff --git a/src/operator/cudnn_deconvolution-inl.h b/src/operator/cudnn_deconvolution-inl.h index 61652b70ac6e..63e57c29fb36 100644 --- a/src/operator/cudnn_deconvolution-inl.h +++ b/src/operator/cudnn_deconvolution-inl.h @@ -57,23 +57,33 @@ class CuDNNDeconvolutionOp : public Operator { Init(s, in_data, out_data); } Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( - mshadow::Shape1(forward_workspace_), s); - CHECK_EQ(cudnnConvolutionBackwardData_v3(s->dnn_handle_, - &alpha, - filter_desc_, - wmat.dptr_, - in_desc_, - data.dptr_, - conv_desc_, - back_algo_, - workspace.dptr_, - backward_workspace_byte_, - &beta, - out_desc_, - out.dptr_), CUDNN_STATUS_SUCCESS); + mshadow::Shape1(forward_workspace_), s); + CHECK_EQ(cudnnConvolutionBackwardData_v3(s->dnn_handle_, + &alpha, + filter_desc_, + wmat.dptr_, + in_desc_, + data.dptr_, + conv_desc_, + back_algo_, + workspace.dptr_, + backward_workspace_byte_, + &beta, + out_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[deconv::kBias].get(s); +#if CUDNN_VERSION_EQUAL(4,0) + CHECK_EQ(cudnnAddTensor(s->dnn_handle_, + &alpha, + bias_desc_, + bias.dptr_, + &beta, + out_desc_, + out.dptr_), CUDNN_STATUS_SUCCESS); +#endif +#if CUDNN_VERSION_EQUAL(3,0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, @@ -82,6 +92,7 @@ class CuDNNDeconvolutionOp : public Operator { &beta, out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); +#endif } } @@ -108,7 +119,7 @@ class CuDNNDeconvolutionOp : public Operator { Tensor data = in_data[deconv::kData].get(s); Tensor gdata = in_grad[deconv::kData].get(s); Tensor workspace = ctx.requested[deconv::kTempSpace].get_space( - mshadow::Shape1(backward_workspace_), s); + mshadow::Shape1(backward_workspace_), s); if (!param_.no_bias) { Tensor gbias = in_grad[deconv::kBias].get(s); CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_, @@ -132,19 +143,19 @@ class CuDNNDeconvolutionOp : public Operator { &beta, filter_desc_, gwmat.dptr_), CUDNN_STATUS_SUCCESS); - CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_, - &alpha, - out_desc_, - grad.dptr_, - filter_desc_, - wmat.dptr_, - conv_desc_, - algo_, - workspace.dptr_, - forward_workspace_byte_, - &beta, - in_desc_, - gdata.dptr_), CUDNN_STATUS_SUCCESS); + CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_, + &alpha, + out_desc_, + grad.dptr_, + filter_desc_, + wmat.dptr_, + conv_desc_, + algo_, + workspace.dptr_, + forward_workspace_byte_, + &beta, + in_desc_, + gdata.dptr_), CUDNN_STATUS_SUCCESS); } private: From 821bf3e2e69b3477aa8a0ad70ecca79928036d5d Mon Sep 17 00:00:00 2001 From: yajiedesign Date: Sat, 19 Dec 2015 11:29:11 +0800 Subject: [PATCH 09/27] fix lint --- src/operator/cudnn_convolution-inl.h | 4 ++-- src/operator/cudnn_deconvolution-inl.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h index 5187909abfa0..06e124d10b36 100644 --- a/src/operator/cudnn_convolution-inl.h +++ b/src/operator/cudnn_convolution-inl.h @@ -73,7 +73,7 @@ class CuDNNConvolutionOp : public Operator { if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[conv::kBias].get(s); -#if CUDNN_VERSION_EQUAL(4,0) +#if CUDNN_VERSION_EQUAL(4, 0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, &alpha, bias_desc_, @@ -82,7 +82,7 @@ class CuDNNConvolutionOp : public Operator { out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); #endif -#if CUDNN_VERSION_EQUAL(3,0) +#if CUDNN_VERSION_EQUAL(3, 0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, diff --git a/src/operator/cudnn_deconvolution-inl.h b/src/operator/cudnn_deconvolution-inl.h index 63e57c29fb36..b1b701bcde8c 100644 --- a/src/operator/cudnn_deconvolution-inl.h +++ b/src/operator/cudnn_deconvolution-inl.h @@ -74,7 +74,7 @@ class CuDNNDeconvolutionOp : public Operator { if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[deconv::kBias].get(s); -#if CUDNN_VERSION_EQUAL(4,0) +#if CUDNN_VERSION_EQUAL(4, 0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, &alpha, bias_desc_, @@ -83,7 +83,7 @@ class CuDNNDeconvolutionOp : public Operator { out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); #endif -#if CUDNN_VERSION_EQUAL(3,0) +#if CUDNN_VERSION_EQUAL(3, 0) CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, From 15d7af251f53eb6434c289910d396a3c77bde2f0 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 19 Dec 2015 09:33:41 -0700 Subject: [PATCH 10/27] fix cudnn selection --- src/common/cuda_utils.h | 3 --- src/operator/cudnn_convolution-inl.h | 4 ++-- src/operator/cudnn_deconvolution-inl.h | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/common/cuda_utils.h b/src/common/cuda_utils.h index 839ee0ec26fa..97570d4405d5 100644 --- a/src/common/cuda_utils.h +++ b/src/common/cuda_utils.h @@ -151,8 +151,5 @@ inline const char* CurandGetErrorString(curandStatus_t status) { CHECK_EQ(e, CUDNN_STATUS_SUCCESS) << "cuDNN: " << cudnnGetErrorString(e); \ } -#define CUDNN_VERSION_EQUAL(major, minor) \ - ((CUDNN_MAJOR * 1000 + CUDNN_MINOR * 100) == (major * 1000 + minor * 100)) - #endif // MXNET_USE_CUDNN #endif // MXNET_COMMON_CUDA_UTILS_H_ diff --git a/src/operator/cudnn_convolution-inl.h b/src/operator/cudnn_convolution-inl.h index 06e124d10b36..9dc5a4e89636 100644 --- a/src/operator/cudnn_convolution-inl.h +++ b/src/operator/cudnn_convolution-inl.h @@ -73,7 +73,7 @@ class CuDNNConvolutionOp : public Operator { if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[conv::kBias].get(s); -#if CUDNN_VERSION_EQUAL(4, 0) +#if CUDNN_MAJOR == 4 CHECK_EQ(cudnnAddTensor(s->dnn_handle_, &alpha, bias_desc_, @@ -82,7 +82,7 @@ class CuDNNConvolutionOp : public Operator { out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); #endif -#if CUDNN_VERSION_EQUAL(3, 0) +#if CUDNN_MAJOR == 3 CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, diff --git a/src/operator/cudnn_deconvolution-inl.h b/src/operator/cudnn_deconvolution-inl.h index b1b701bcde8c..3d8d91105465 100644 --- a/src/operator/cudnn_deconvolution-inl.h +++ b/src/operator/cudnn_deconvolution-inl.h @@ -74,7 +74,7 @@ class CuDNNDeconvolutionOp : public Operator { if (!param_.no_bias) { beta = 1.0f; Tensor bias = in_data[deconv::kBias].get(s); -#if CUDNN_VERSION_EQUAL(4, 0) +#if CUDNN_MAJOR == 4 CHECK_EQ(cudnnAddTensor(s->dnn_handle_, &alpha, bias_desc_, @@ -83,7 +83,7 @@ class CuDNNDeconvolutionOp : public Operator { out_desc_, out.dptr_), CUDNN_STATUS_SUCCESS); #endif -#if CUDNN_VERSION_EQUAL(3, 0) +#if CUDNN_MAJOR == 3 CHECK_EQ(cudnnAddTensor(s->dnn_handle_, CUDNN_ADD_SAME_C, &alpha, From 982fa877f2d72d6365dc215234fc62821e80d7a5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 19 Dec 2015 17:09:29 -0800 Subject: [PATCH 11/27] [SYMBOL] Fix multiout check error --- dmlc-core | 2 +- mshadow | 2 +- ps-lite | 2 +- src/symbol/static_graph.cc | 5 ++--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dmlc-core b/dmlc-core index a9b3320d2c6b..c109d083b64a 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit a9b3320d2c6b29506139784b877142c9ee78caaf +Subproject commit c109d083b64abb1f6d8fff07e51086815818264b diff --git a/mshadow b/mshadow index 629d09ff9323..120acae34880 160000 --- a/mshadow +++ b/mshadow @@ -1 +1 @@ -Subproject commit 629d09ff93232f73dbd58d71eba55b24830cfc1e +Subproject commit 120acae3488099d8f27886448cb225aca1c86a31 diff --git a/ps-lite b/ps-lite index d175ec2393c6..1f2e84a86c63 160000 --- a/ps-lite +++ b/ps-lite @@ -1 +1 @@ -Subproject commit d175ec2393c6ab00d5d0a143b42ee6dc6efb7038 +Subproject commit 1f2e84a86c63eb79c4f4c2404851dfa7eb9f4231 diff --git a/src/symbol/static_graph.cc b/src/symbol/static_graph.cc index d2ac61361a9a..5cb46b2be203 100644 --- a/src/symbol/static_graph.cc +++ b/src/symbol/static_graph.cc @@ -20,7 +20,8 @@ std::vector StaticGraph::PostDFSOrder(const std::vector& hea ret.reserve(nodes.size() / 2); std::vector > stack; // heads - for (auto &head : head_nodes) { + for (auto head : head_nodes) { + if (visited.count(head) != 0) continue; stack.push_back(std::make_pair(head, 0)); CHECK_EQ(banned.count(head), 0); // bugfix @@ -197,8 +198,6 @@ bool StaticGraph::InferShape(std::vector *in_shape, for (const auto& head : heads) { head_nodes.push_back(head.source_id); } - std::sort(head_nodes.begin(), head_nodes.end()); - head_nodes.resize(std::unique(head_nodes.begin(), head_nodes.end()) - head_nodes.begin()); std::vector fwd_nodes = PostDFSOrder(head_nodes, std::unordered_set()); uint32_t counter = 0; for (uint32_t nid : fwd_nodes) { From 3e6cc62bf3634975f24117ebdacb12f83aa211b1 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 19 Dec 2015 20:30:20 -0700 Subject: [PATCH 12/27] Update README.md --- example/neural-style/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/neural-style/README.md b/example/neural-style/README.md index 1d850c16eb99..50402cc4de07 100644 --- a/example/neural-style/README.md +++ b/example/neural-style/README.md @@ -14,7 +14,7 @@ Then run `python run.py`, use `-h` to see more options -It takes 50 secs for a GTX 980 to generate the above 600x400 image. +It takes 30 secs for a Titan X to generate the above 600x400 image. ## Note From 832a198fa03962154d41d0411fb182200c37d671 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sat, 19 Dec 2015 10:16:28 -0700 Subject: [PATCH 13/27] update fix gamma for bn --- src/operator/batch_norm-inl.h | 92 ++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 28 deletions(-) diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h index 4003bc50378a..3c30a1e365bc 100644 --- a/src/operator/batch_norm-inl.h +++ b/src/operator/batch_norm-inl.h @@ -22,7 +22,7 @@ namespace op { namespace batchnorm { enum BatchNormOpInputs {kData, kGamma, kBeta}; -enum BatchNormOpOutputs {kOut, kOutNoAffine, kMean, kVar}; +enum BatchNormOpOutputs {kOut, kMean, kVar, kOutNoAffine}; enum BatchNormOpAuxiliary {kMovingMean, kMovingVar}; enum BatchNormBackResource {kTempSpace}; } // namespace batchnorm @@ -55,11 +55,12 @@ class BatchNormOp : public Operator { const std::vector &aux_states) { using namespace mshadow; using namespace mshadow::expr; + const size_t expected_out = this->param_.fix_gamma ? 3 : 4; CHECK_EQ(in_data.size(), 3); CHECK_EQ(aux_states.size(), 2); if (ctx.is_train) { - CHECK_EQ(out_data.size(), 4); - CHECK_EQ(req.size(), 4); + CHECK_EQ(out_data.size(), expected_out); + CHECK_EQ(req.size(), expected_out); } else { CHECK_GE(out_data.size(), 1); CHECK_GE(req.size(), 1); @@ -76,13 +77,14 @@ class BatchNormOp : public Operator { in_data[batchnorm::kData].shape_[1], 1, 1); data = in_data[batchnorm::kData].get_with_shape(dshape, s); out = out_data[batchnorm::kOut].get_with_shape(dshape, s); - if (ctx.is_train) { - out_no_affine = out_data[batchnorm::kOutNoAffine].get_with_shape(dshape, s); + if (ctx.is_train && !param_.fix_gamma) { + out_no_affine = out_data[batchnorm::kOutNoAffine].get_with_shape(dshape, + s); } } else { data = in_data[batchnorm::kData].get(s); out = out_data[batchnorm::kOut].get(s); - if (ctx.is_train) { + if (ctx.is_train && !param_.fix_gamma) { out_no_affine = out_data[batchnorm::kOutNoAffine].get(s); } } @@ -94,17 +96,23 @@ class BatchNormOp : public Operator { if (ctx.is_train) { Tensor mean = out_data[batchnorm::kMean].get(s); Tensor var = out_data[batchnorm::kVar].get(s); - CHECK(req[batchnorm::kOutNoAffine] == kNullOp || req[batchnorm::kOutNoAffine] == kWriteTo); CHECK(req[batchnorm::kMean] == kNullOp || req[batchnorm::kMean] == kWriteTo); CHECK(req[batchnorm::kVar] == kNullOp || req[batchnorm::kVar] == kWriteTo); // The first three steps must be enforced. mean = scale * sumall_except_dim<1>(data); var = scale * sumall_except_dim<1>(F( data - broadcast<1>(mean, data.shape_))); - out_no_affine = (data - broadcast<1>(mean, data.shape_)) / - F(broadcast<1>(var + param_.eps, data.shape_)); - Assign(out, req[batchnorm::kOut], out_no_affine * broadcast<1>(slope, out.shape_) + - broadcast<1>(bias, out.shape_)); + if (param_.fix_gamma) { + Assign(out, req[batchnorm::kOut], (data - broadcast<1>(mean, data.shape_)) / + F(broadcast<1>(var + param_.eps, data.shape_)) + + broadcast<1>(bias, out.shape_)); + } else { + CHECK(req[batchnorm::kOutNoAffine] == kNullOp || req[batchnorm::kOutNoAffine] == kWriteTo); + out_no_affine = (data - broadcast<1>(mean, data.shape_)) / + F(broadcast<1>(var + param_.eps, data.shape_)); + Assign(out, req[batchnorm::kOut], out_no_affine * broadcast<1>(slope, out.shape_) + + broadcast<1>(bias, out.shape_)); + } } else { Assign(out, req[batchnorm::kOut], broadcast<1>(slope / F(moving_var + param_.eps), @@ -123,9 +131,10 @@ class BatchNormOp : public Operator { const std::vector &aux_states) { using namespace mshadow; using namespace mshadow::expr; + const size_t expected_out = param_.fix_gamma ? 3 : 4; CHECK_EQ(out_grad.size(), 1); CHECK_EQ(in_data.size(), 3); - CHECK_EQ(out_data.size(), 4); + CHECK_EQ(out_data.size(), expected_out); CHECK_EQ(in_grad.size(), 3); Stream *s = ctx.get_stream(); Tensor data, grad, grad_in; @@ -138,12 +147,17 @@ class BatchNormOp : public Operator { data = in_data[batchnorm::kData].get_with_shape(dshape, s); grad = out_grad[batchnorm::kOut].get_with_shape(dshape, s); grad_in = in_grad[batchnorm::kData].get_with_shape(dshape, s); - out_no_affine = out_data[batchnorm::kOutNoAffine].get_with_shape(dshape, s); + if (!param_.fix_gamma) { + out_no_affine = out_data[batchnorm::kOutNoAffine].get_with_shape(dshape, + s); + } } else { data = in_data[batchnorm::kData].get(s); grad = out_grad[batchnorm::kOut].get(s); grad_in = in_grad[batchnorm::kData].get(s); - out_no_affine = out_data[batchnorm::kOutNoAffine].get(s); + if (!param_.fix_gamma) { + out_no_affine = out_data[batchnorm::kOutNoAffine].get(s); + } } Tensor mean = out_data[batchnorm::kMean].get(s); @@ -178,13 +192,19 @@ class BatchNormOp : public Operator { // assign if (!param_.fix_gamma) { Assign(gslope, req[batchnorm::kGamma], sumall_except_dim<1>(grad * out_no_affine)); - } - Assign(gbias, req[batchnorm::kBeta], sumall_except_dim<1>(grad)); - Assign(grad_in, req[batchnorm::kData], (grad * broadcast<1>(slope, data.shape_)) * + Assign(grad_in, req[batchnorm::kData], (grad * broadcast<1>(slope, data.shape_)) * + broadcast<1>(1.0f / F(var + param_.eps), data.shape_) + + broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean, + data.shape_)) + + broadcast<1>(gmean, data.shape_) * scale); + } else { + Assign(grad_in, req[batchnorm::kData], grad * broadcast<1>(1.0f / F(var + param_.eps), data.shape_) + broadcast<1>(gvar, data.shape_) * scale * 2.0f * (data - broadcast<1>(mean, data.shape_)) + broadcast<1>(gmean, data.shape_) * scale); + } + Assign(gbias, req[batchnorm::kBeta], sumall_except_dim<1>(grad)); } private: @@ -217,9 +237,11 @@ class BatchNormProp : public OperatorProperty { in_shape->at(2) = TShape(Shape1(dshape[1])); out_shape->clear(); out_shape->push_back(dshape); - out_shape->push_back(dshape); out_shape->push_back(Shape1(dshape[1])); out_shape->push_back(Shape1(dshape[1])); + if (!param_.fix_gamma) { + out_shape->push_back(dshape); + } aux_shape->clear(); aux_shape->push_back(Shape1(dshape[1])); aux_shape->push_back(Shape1(dshape[1])); @@ -240,14 +262,24 @@ class BatchNormProp : public OperatorProperty { const std::vector &out_grad, const std::vector &in_data, const std::vector &out_data) const override { - return {out_grad[batchnorm::kOut], - out_data[batchnorm::kOutNoAffine], - out_data[batchnorm::kMean], - out_data[batchnorm::kVar], - in_data[batchnorm::kData], - in_data[batchnorm::kGamma], - in_data[batchnorm::kBeta] - }; + if (param_.fix_gamma) { + return {out_grad[batchnorm::kOut], + out_data[batchnorm::kMean], + out_data[batchnorm::kVar], + in_data[batchnorm::kData], + in_data[batchnorm::kGamma], + in_data[batchnorm::kBeta] + }; + } else { + return {out_grad[batchnorm::kOut], + out_data[batchnorm::kOutNoAffine], + out_data[batchnorm::kMean], + out_data[batchnorm::kVar], + in_data[batchnorm::kData], + in_data[batchnorm::kGamma], + in_data[batchnorm::kBeta] + }; + } } std::vector > BackwardInplaceOption( @@ -268,7 +300,7 @@ class BatchNormProp : public OperatorProperty { } int NumOutputs() const override { - return 4; + return param_.fix_gamma ? 3 : 4; } std::vector ListArguments() const override { @@ -276,7 +308,11 @@ class BatchNormProp : public OperatorProperty { } std::vector ListOutputs() const override { - return {"output", "output_no_affine", "mean", "var"}; + if (param_.fix_gamma) { + return {"output", "mean", "var"}; + } else { + return {"output", "mean", "var", "output_no_affine"}; + } } std::vector ListAuxiliaryStates() const override { From ebe1001053d14ccdbc24f2b2e9688163b742b312 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sat, 19 Dec 2015 23:43:36 -0600 Subject: [PATCH 14/27] Add error detector for scala tests on Travis --- tests/travis/error_detector.sh | 8 ++++++++ tests/travis/run_test.sh | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100755 tests/travis/error_detector.sh diff --git a/tests/travis/error_detector.sh b/tests/travis/error_detector.sh new file mode 100755 index 000000000000..658d2b614c48 --- /dev/null +++ b/tests/travis/error_detector.sh @@ -0,0 +1,8 @@ +#!/bin/bash +file=scala_test_results.txt + +testFail=$(grep -ci "[ERROR]" $file) +if [ "$testFail" != "0" ]; then + echo "Some unit tests failed. " + exit 1 +fi \ No newline at end of file diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index bd2771042f50..da39297fb032 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -124,12 +124,12 @@ if [ ${TASK} == "scala_test" ]; then if [ ${TRAVIS_OS_NAME} == "osx" ]; then mvn clean package -P osx-x86_64 - mvn integration-test -P osx-x86_64 + mvn integration-test -P osx-x86_64 > scala_test_results.txt fi if [ ${TRAVIS_OS_NAME} == "linux" ]; then # use g++-4.8 for linux mvn clean package -P linux-x86_64 -D cxx=g++-4.8 - mvn integration-test -P linux-x86_64 + mvn integration-test -P linux-x86_64 > scala_test_results.txt fi exit 0 From 587266cf13dd37fb691c8386a0fadf95ce6f37e2 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sat, 19 Dec 2015 23:49:18 -0600 Subject: [PATCH 15/27] corrected path to new sh file --- tests/travis/error_detector.sh | 1 + tests/travis/run_test.sh | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tests/travis/error_detector.sh b/tests/travis/error_detector.sh index 658d2b614c48..c29c8865806b 100755 --- a/tests/travis/error_detector.sh +++ b/tests/travis/error_detector.sh @@ -1,4 +1,5 @@ #!/bin/bash +pwd file=scala_test_results.txt testFail=$(grep -ci "[ERROR]" $file) diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index da39297fb032..5bbc885c24e8 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -132,6 +132,9 @@ if [ ${TASK} == "scala_test" ]; then mvn integration-test -P linux-x86_64 > scala_test_results.txt fi + chmod +x tests/travis/error_detector.sh + ./tests/travis/error_detector.sh + exit 0 fi From f6e767dad63d1b721393e8bd13c9507b213d86af Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Sun, 20 Dec 2015 01:01:16 -0700 Subject: [PATCH 16/27] revert to sgd --- python/mxnet/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/model.py b/python/mxnet/model.py index b929150422cc..3200c7b46233 100644 --- a/python/mxnet/model.py +++ b/python/mxnet/model.py @@ -391,7 +391,7 @@ class FeedForward(BASE_ESTIMATOR): The additional keyword arguments passed to optimizer. """ def __init__(self, symbol, ctx=None, - num_epoch=None, epoch_size=None, optimizer='ccsgd', + num_epoch=None, epoch_size=None, optimizer='sgd', initializer=Uniform(0.01), numpy_batch_size=128, arg_params=None, aux_params=None, From 6359d2aa58f3e56dba66846930fe52b76a163e2c Mon Sep 17 00:00:00 2001 From: muli Date: Sun, 20 Dec 2015 16:46:17 -0500 Subject: [PATCH 17/27] update ps-lite --- ps-lite | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ps-lite b/ps-lite index 1f2e84a86c63..b1da4b6e0f9e 160000 --- a/ps-lite +++ b/ps-lite @@ -1 +1 @@ -Subproject commit 1f2e84a86c63eb79c4f4c2404851dfa7eb9f4231 +Subproject commit b1da4b6e0f9e387ee30d2d02a063944986ff0cbd From 1d45e2db3d964441f3b785d14c7d7534ca6249f9 Mon Sep 17 00:00:00 2001 From: xgastaldi Date: Mon, 21 Dec 2015 01:19:17 +0100 Subject: [PATCH 18/27] Update test_conv.py --- tests/python/gpu/test_conv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/gpu/test_conv.py b/tests/python/gpu/test_conv.py index 3e2a3102e113..bbb9f386042b 100644 --- a/tests/python/gpu/test_conv.py +++ b/tests/python/gpu/test_conv.py @@ -21,7 +21,7 @@ def get_conv(): fl = mx.symbol.Flatten(data = mp2) fc2 = mx.symbol.FullyConnected(data = fl, num_hidden=10) - softmax = mx.symbol.SoftmaxOutput(data = fc2) + softmax = mx.symbol.SoftmaxOutput(data = fc2, name='softmax') return softmax def get_iter(data_dir): From 617222b1ee46c17f2557777dadea8e493ba6f838 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 12:08:06 -0500 Subject: [PATCH 19/27] print message and content when failing (+2 squashed commits) Squashed commits: [99ccdac] fix path [b32020a] fixed paths of using test scripts --- tests/travis/error_detector.sh | 6 ++++-- tests/travis/run_test.sh | 8 ++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/travis/error_detector.sh b/tests/travis/error_detector.sh index c29c8865806b..2f67f8502589 100755 --- a/tests/travis/error_detector.sh +++ b/tests/travis/error_detector.sh @@ -1,9 +1,11 @@ #!/bin/bash -pwd file=scala_test_results.txt -testFail=$(grep -ci "[ERROR]" $file) +testFail=$(grep -ci "ERROR" $file) if [ "$testFail" != "0" ]; then + cat $file echo "Some unit tests failed. " exit 1 +else + echo "All unit tests passed! " fi \ No newline at end of file diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh index 5bbc885c24e8..8e637285eef0 100755 --- a/tests/travis/run_test.sh +++ b/tests/travis/run_test.sh @@ -124,16 +124,16 @@ if [ ${TASK} == "scala_test" ]; then if [ ${TRAVIS_OS_NAME} == "osx" ]; then mvn clean package -P osx-x86_64 - mvn integration-test -P osx-x86_64 > scala_test_results.txt + mvn integration-test -P osx-x86_64 --log-file scala_test_results.txt fi if [ ${TRAVIS_OS_NAME} == "linux" ]; then # use g++-4.8 for linux mvn clean package -P linux-x86_64 -D cxx=g++-4.8 - mvn integration-test -P linux-x86_64 > scala_test_results.txt + mvn integration-test -P linux-x86_64 --log-file scala_test_results.txt fi - chmod +x tests/travis/error_detector.sh - ./tests/travis/error_detector.sh + chmod +x ../tests/travis/error_detector.sh + ../tests/travis/error_detector.sh || exit -1 exit 0 fi From 4be04b11653f4c777edc6443e776f50250fbc093 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 21:06:54 -0600 Subject: [PATCH 20/27] Added class for learning rate scheduler --- .../scala/ml/dmlc/mxnet/LRScheduler.scala | 42 ++++++++++++++++++- .../main/scala/ml/dmlc/mxnet/NDArray.scala | 2 +- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala index c782897b0f66..dfd5ed34d9da 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -1,5 +1,43 @@ package ml.dmlc.mxnet +import org.slf4j.LoggerFactory -class LRScheduler { - def apply(numUpdate: Int): Float = ??? +/** + * Class for learning rate scheduler, which adaptively changes the learning rate + * based on the training progress. + * @author Yuan Tang + */ + + +abstract class LRScheduler(var baseLR: Float = 0.01) { + /** + * Base class of a learning rate scheduler + */ + + def apply(numUpdate: Int): Unit +} + +class FactorScheduler(var step: Int, var factor: Float) extends LRScheduler { + /** + * Class for reducing learning rate in factor + */ + + var count: Int = 0 + private val logger = LoggerFactory.getLogger(classOf[FactorScheduler]) + + if (step < 1) { + throw new IllegalArgumentException("Schedule step must be greater or equal than 1 round") + } + if (factor >= 1.0) { + throw new IllegalArgumentException("Factor must be less than 1 to make lr reduce") + } + + def apply(numUpdate: Int): Float = { + + if (numUpdate > this.count + this.step) { + this.count += this.step + this.baseLR *= this.factor + this.logger.info(s"""Update$numUpdate: Change learning rate to ${this.baseLR}%.5f""") + } + this.baseLR + } } diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala index a36c87292d64..9b8a55b99c77 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala @@ -7,7 +7,7 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer} /** * NDArray API of mxnet - * @author Yizhi Liu, Terry Tang + * @author Yizhi Liu, Yuan Tang */ object NDArray { private val logger = LoggerFactory.getLogger(classOf[NDArray]) From 5e5aa59d2f6bdbd659bd3bb6c04f6bf25c87384d Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 21:16:41 -0600 Subject: [PATCH 21/27] Documentation for LRScheduler --- .../scala/ml/dmlc/mxnet/LRScheduler.scala | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala index dfd5ed34d9da..744868db3451 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -2,24 +2,38 @@ package ml.dmlc.mxnet import org.slf4j.LoggerFactory /** - * Class for learning rate scheduler, which adaptively changes the learning rate + * Learning rate scheduler, which adaptively changes the learning rate * based on the training progress. * @author Yuan Tang */ - abstract class LRScheduler(var baseLR: Float = 0.01) { /** * Base class of a learning rate scheduler + * + * The training progress is presented by `num_update`, which can be roughly + * viewed as the number of minibatches executed so far. Its value is + * non-decreasing, and increases at most by one. + * + * The exact value is the upper bound of the number of updates applied to + * a weight/index. + * + * @param numUpdate Int, the maximal number of updates applied to a weight. */ - def apply(numUpdate: Int): Unit } +/** + * Class for reducing learning rate in factor + * + * Assume the weight has been updated by n times, then the learning rate will + * be base_lr * factor^^(floor(n/step)) + * + * @param step Int, schedule learning rate after n updates + * @param factor Float, the factor for reducing the learning rate + * + */ class FactorScheduler(var step: Int, var factor: Float) extends LRScheduler { - /** - * Class for reducing learning rate in factor - */ var count: Int = 0 private val logger = LoggerFactory.getLogger(classOf[FactorScheduler]) From 436062a95c187c2637ead78a6b8e0a0fc283d279 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 21:23:45 -0600 Subject: [PATCH 22/27] Fixed type mismatch --- .../core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala index 744868db3451..b12126e96b10 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -7,7 +7,7 @@ import org.slf4j.LoggerFactory * @author Yuan Tang */ -abstract class LRScheduler(var baseLR: Float = 0.01) { +abstract class LRScheduler(var baseLR: Double = 0.01) { /** * Base class of a learning rate scheduler * @@ -20,7 +20,7 @@ abstract class LRScheduler(var baseLR: Float = 0.01) { * * @param numUpdate Int, the maximal number of updates applied to a weight. */ - def apply(numUpdate: Int): Unit + def apply(numUpdate: Int): Double } /** @@ -45,12 +45,12 @@ class FactorScheduler(var step: Int, var factor: Float) extends LRScheduler { throw new IllegalArgumentException("Factor must be less than 1 to make lr reduce") } - def apply(numUpdate: Int): Float = { + def apply(numUpdate: Int): Double = { if (numUpdate > this.count + this.step) { this.count += this.step this.baseLR *= this.factor - this.logger.info(s"""Update$numUpdate: Change learning rate to ${this.baseLR}%.5f""") + this.logger.info(s"""Update$numUpdate: Change learning rate to ${this.baseLR}""") } this.baseLR } From 770afc2f85adcc7680af8854ca67a1ab1f7d8f31 Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 22:06:06 -0600 Subject: [PATCH 23/27] Change error throw to require --- .../core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala index b12126e96b10..7f56385534dd 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -38,12 +38,8 @@ class FactorScheduler(var step: Int, var factor: Float) extends LRScheduler { var count: Int = 0 private val logger = LoggerFactory.getLogger(classOf[FactorScheduler]) - if (step < 1) { - throw new IllegalArgumentException("Schedule step must be greater or equal than 1 round") - } - if (factor >= 1.0) { - throw new IllegalArgumentException("Factor must be less than 1 to make lr reduce") - } + require(step < 1, "Schedule step must be greater or equal than 1 round") + require(factor >= 1.0, "Factor must be less than 1 to make lr reduce") def apply(numUpdate: Int): Double = { From b170bf207905452b9c2babf0dfcfa98c73400f7e Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Sun, 20 Dec 2015 23:02:16 -0600 Subject: [PATCH 24/27] Change to Float and make member vars protected --- .../src/main/scala/ml/dmlc/mxnet/LRScheduler.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala index 7f56385534dd..756194b94566 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LRScheduler.scala @@ -7,7 +7,7 @@ import org.slf4j.LoggerFactory * @author Yuan Tang */ -abstract class LRScheduler(var baseLR: Double = 0.01) { +abstract class LRScheduler(protected var baseLR: Float = 0.01f) { /** * Base class of a learning rate scheduler * @@ -20,7 +20,7 @@ abstract class LRScheduler(var baseLR: Double = 0.01) { * * @param numUpdate Int, the maximal number of updates applied to a weight. */ - def apply(numUpdate: Int): Double + def apply(numUpdate: Int): Float } /** @@ -33,20 +33,20 @@ abstract class LRScheduler(var baseLR: Double = 0.01) { * @param factor Float, the factor for reducing the learning rate * */ -class FactorScheduler(var step: Int, var factor: Float) extends LRScheduler { +class FactorScheduler(protected var step: Int, protected var factor: Float) extends LRScheduler { - var count: Int = 0 + protected var count: Int = 0 private val logger = LoggerFactory.getLogger(classOf[FactorScheduler]) require(step < 1, "Schedule step must be greater or equal than 1 round") require(factor >= 1.0, "Factor must be less than 1 to make lr reduce") - def apply(numUpdate: Int): Double = { + def apply(numUpdate: Int): Float = { if (numUpdate > this.count + this.step) { this.count += this.step this.baseLR *= this.factor - this.logger.info(s"""Update$numUpdate: Change learning rate to ${this.baseLR}""") + this.logger.info(s"Update$numUpdate: Change learning rate to ${this.baseLR}") } this.baseLR } From b10a6b2bff7ed151d495dc8e54474c403fac7966 Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Mon, 21 Dec 2015 23:23:55 +0800 Subject: [PATCH 25/27] add SGD optimizer, add java friendly inteface for NDArray --- .../main/scala/ml/dmlc/mxnet/NDArray.scala | 23 +++++++++++++++++ .../scala/ml/dmlc/mxnet/optimizer/SGD.scala | 3 +-- .../scala/ml/dmlc/mxnet/NDArraySuite.scala | 25 ++++++++++++------- 3 files changed, 40 insertions(+), 11 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala index 9b8a55b99c77..ca02d6c3d26e 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala @@ -201,6 +201,10 @@ object NDArray { new NDArray(handle = NDArray._newAllocHandle(shape, context, delayAlloc = false)) } + def empty(shape: Int *): NDArray = empty(shape.toArray) + + def empty(ctx: Context, shape: Int *): NDArray = empty(shape.toArray, ctx) + /** * Create a new NDArray filled with 0, with specified shape. * @@ -215,6 +219,10 @@ object NDArray { arr } + def zeros(shape: Int *): NDArray = zeros(shape.toArray) + + def zeros(ctx: Context, shape: Int *): NDArray = zeros(shape.toArray, ctx) + /** * Create a new NDArray filled with 1, with specified shape. * @param shape shape of the NDArray. @@ -227,6 +235,21 @@ object NDArray { arr } + def ones(shape: Int *): NDArray = ones(shape.toArray) + + def ones(ctx: Context, shape: Int *): NDArray = ones(shape.toArray, ctx) + + /** + * Clip ndarray elements to range (from, to) + * @param array ndarray to be clipped + * @param min array min elements + * @param max array max elements + * @return a new clipped [[NDArray]] + */ + def clip(array: NDArray, min: Float, max: Float): NDArray = { + NDArray._genericNDArrayFunction("clip", Array(array, min, max))(0) + } + /** * Create a new NDArray that copies content from source_array. * @param sourceArr Source data to create NDArray from. diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala index fa89c873c654..bd434dab8ae2 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/optimizer/SGD.scala @@ -31,8 +31,7 @@ class SGD(val learningRate: Float = 0.01f, val momentum: Float = 0.0f, var resdGrad = grad * rescaleGrad if (clipGradient != 0f) { - resdGrad = NDArray._genericNDArrayFunction( - "clip", Array(resdGrad, -clipGradient, clipGradient))(0) + resdGrad = NDArray.clip(resdGrad, -clipGradient, clipGradient) } if (state != null) { val mom = state.asInstanceOf[NDArray] diff --git a/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala b/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala index 4dfa62e19621..63dd84835bc9 100644 --- a/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala +++ b/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala @@ -5,29 +5,29 @@ import ml.dmlc.mxnet.NDArrayConversions._ class NDArraySuite extends FunSuite with BeforeAndAfterAll { test("to java array") { - val ndarray = NDArray.zeros(Array(2, 2)) + val ndarray = NDArray.zeros(2, 2) assert(ndarray.toArray === Array(0f, 0f, 0f, 0f)) } test("to scalar") { - val ndzeros = NDArray.zeros(Array(1)) + val ndzeros = NDArray.zeros(1) assert(ndzeros.toScalar === 0f) - val ndones = NDArray.ones(Array(1)) + val ndones = NDArray.ones(1) assert(ndones.toScalar === 1f) } test ("call toScalar on an ndarray which is not a scalar") { - intercept[Exception] { NDArray.zeros(Array(1,1)).toScalar } + intercept[Exception] { NDArray.zeros(1, 1).toScalar } } test("size and shape") { - val ndzeros = NDArray.zeros(Array(4, 1)) + val ndzeros = NDArray.zeros(4, 1) assert(ndzeros.shape === Array(4, 1)) assert(ndzeros.size === 4) } test("plus") { - val ndzeros = NDArray.zeros(Array(2, 1)) + val ndzeros = NDArray.zeros(2, 1) val ndones = ndzeros + 1f assert(ndones.toArray === Array(1f, 1f)) assert((ndones + ndzeros).toArray === Array(1f, 1f)) @@ -38,7 +38,7 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { } test("minus") { - val ndones = NDArray.ones(Array(2, 1)) + val ndones = NDArray.ones(2, 1) val ndzeros = ndones - 1f assert(ndzeros.toArray === Array(0f, 0f)) assert((ndones - ndzeros).toArray === Array(1f, 1f)) @@ -50,7 +50,7 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { } test("multiplication") { - val ndones = NDArray.ones(Array(2, 1)) + val ndones = NDArray.ones(2, 1) val ndtwos = ndones * 2 assert(ndtwos.toArray === Array(2f, 2f)) assert((ndones * ndones).toArray === Array(1f, 1f)) @@ -61,7 +61,7 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { } test("division") { - val ndones = NDArray.ones(Array(2, 1)) + val ndones = NDArray.ones(2, 1) val ndzeros = ndones - 1f val ndhalves = ndones / 2 assert(ndhalves.toArray === Array(0.5f, 0.5f)) @@ -73,4 +73,11 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { assert(ndhalves.toArray === Array(1f, 1f)) } + test("clip") { + val ndarray = NDArray.zeros(3, 2) + ndarray(0, 1).set(1f) + ndarray(1, 2).set(2f) + ndarray(2, 3).set(3f) + assert(NDArray.clip(ndarray, 2f, 3f).toArray === Array(2f, 2f, 2f, 2f, 3f, 3f)) + } } From 8fe6905f89fdb02d4a0087412f385c13dfe40680 Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Tue, 22 Dec 2015 00:09:52 +0800 Subject: [PATCH 26/27] NDArray copy from java array --- .../main/scala/ml/dmlc/mxnet/LibInfo.scala | 5 +++- .../main/scala/ml/dmlc/mxnet/NDArray.scala | 23 +++++++++++-------- .../scala/ml/dmlc/mxnet/NDArraySuite.scala | 20 ++++++++++++---- .../main/native/ml_dmlc_mxnet_native_c_api.cc | 9 ++++++++ 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala index 46dd88c062f6..2592394b8550 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/LibInfo.scala @@ -41,12 +41,15 @@ class LibInfo { ndim: MXUintRef, data: ArrayBuffer[Int]): Int @native def mxNDArraySyncCopyToCPU(handle: NDArrayHandle, - data: Array[Float], + data: Array[MXFloat], size: Int): Int @native def mxNDArraySlice(handle: NDArrayHandle, start: MXUint, end: MXUint, sliceHandle: NDArrayHandle): Int + @native def mxNDArraySyncCopyFromCPU(handle: NDArrayHandle, + source: Array[MXFloat], + size: Int): Int @native def mxKVStoreCreate(name: String, handle: KVStoreHandle): Int @native def mxKVStoreInit(handle: KVStoreHandle, len: MXUint, diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala index ca02d6c3d26e..b0ab1c621fea 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala @@ -215,7 +215,7 @@ object NDArray { */ def zeros(shape: Array[Int], ctx: Context=null): NDArray = { val arr = empty(shape, ctx) - arr(0).set(0f) + arr.set(0f) arr } @@ -231,7 +231,7 @@ object NDArray { */ def ones(shape: Array[Int], ctx: Context=null): NDArray = { val arr = empty(shape, ctx) - arr(0).set(1f) + arr.set(1f) arr } @@ -308,7 +308,10 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { * Peform an synchronize copy from the array. * @param source The data source we should like to copy from. */ - def _syncCopyfrom(source: Array[Float]): Unit = ??? + private def syncCopyfrom(source: Array[Float]): Unit = { + require(source.length == size, "array size do not match the size of NDArray") + checkCall(_LIB.mxNDArraySyncCopyFromCPU(handle, source, source.length)) + } /** * Return a sliced NDArray that shares memory with current one. @@ -325,10 +328,6 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { new NDArray(handle = sliceHandle, writable = this.writable) } - private def _slice(start: Int): NDArray = { - _slice(start, shape(0)) - } - /** * Block until all pending writes operations on current NDArray are finished. * This function will return when all the pending writes to the current @@ -337,7 +336,6 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { */ def waitToRead(): Unit = ??? - def apply(sliceStart: Int): NDArray = _slice(sliceStart) def apply(sliceStart: Int, sliceEnd: Int): NDArray = _slice(sliceStart, sliceEnd) /** @@ -357,10 +355,17 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { this } - def set(other: NDArray) = { + def set(other: NDArray): NDArray = { + require(writable, "trying to assign to a readonly NDArray") other.copyTo(this) } + def set(other: Array[Float]): NDArray = { + require(writable, "trying to assign to a readonly NDArray") + syncCopyfrom(other) + this + } + def +(other: NDArray): NDArray = { NDArray._binaryNDArrayFunction("_plus", this, other) } diff --git a/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala b/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala index 63dd84835bc9..840a381fbd87 100644 --- a/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala +++ b/scala-package/core/src/test/scala/ml/dmlc/mxnet/NDArraySuite.scala @@ -26,6 +26,18 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { assert(ndzeros.size === 4) } + test("set scalar value") { + val ndarray = NDArray.empty(2, 1) + ndarray.set(10f) + assert(ndarray.toArray === Array(10f, 10f)) + } + + test("copy from java array") { + val ndarray = NDArray.empty(4, 1) + ndarray.set(Array(1f, 2f, 3f, 4f)) + assert(ndarray.toArray === Array(1f, 2f, 3f, 4f)) + } + test("plus") { val ndzeros = NDArray.zeros(2, 1) val ndones = ndzeros + 1f @@ -74,10 +86,8 @@ class NDArraySuite extends FunSuite with BeforeAndAfterAll { } test("clip") { - val ndarray = NDArray.zeros(3, 2) - ndarray(0, 1).set(1f) - ndarray(1, 2).set(2f) - ndarray(2, 3).set(3f) - assert(NDArray.clip(ndarray, 2f, 3f).toArray === Array(2f, 2f, 2f, 2f, 3f, 3f)) + val ndarray = NDArray.empty(3, 2) + ndarray.set(Array(1f, 2f, 3f, 4f, 5f, 6f)) + assert(NDArray.clip(ndarray, 2f, 5f).toArray === Array(2f, 2f, 3f, 4f, 5f, 5f)) } } diff --git a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc index 4e6b1461c945..e1bb66d45858 100644 --- a/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/ml_dmlc_mxnet_native_c_api.cc @@ -204,6 +204,15 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_mxnet_LibInfo_mxNDArraySlice(JNIEnv *env, jo return ret; } +JNIEXPORT jint JNICALL Java_ml_dmlc_mxnet_LibInfo_mxNDArraySyncCopyFromCPU + (JNIEnv *env, jobject obj, jobject ndArrayHandle, jfloatArray sourceArr, jint arrSize) { + jlong arrayPtr = getLongField(env, ndArrayHandle); + jfloat *sourcePtr = env->GetFloatArrayElements(sourceArr, NULL); + int ret = MXNDArraySyncCopyFromCPU((NDArrayHandle)arrayPtr, (const mx_float *)sourcePtr, arrSize); + env->ReleaseFloatArrayElements(sourceArr, sourcePtr, 0); + return ret; +} + // The related c api MXKVStoreSetUpdater function takes a c function pointer as its parameter, // while we write java functions here in scala-package. // Thus we have to wrap the function in a java object, and run env->CallVoidMethod(obj) once updater is invoked, From c14ed50b50f14e32718a9be661b7d0a418e80363 Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Tue, 22 Dec 2015 00:17:06 +0800 Subject: [PATCH 27/27] remove NDArray.apply because it is really confusing --- .../core/src/main/scala/ml/dmlc/mxnet/NDArray.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala index b0ab1c621fea..36a46064e4e6 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/NDArray.scala @@ -322,12 +322,16 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { * * @return a sliced NDArray that shares memory with current one. */ - private def _slice(start: Int, stop: Int): NDArray = { + def slice(start: Int, stop: Int): NDArray = { val sliceHandle = new NDArrayHandle() checkCall(_LIB.mxNDArraySlice(handle, start, stop, sliceHandle)) new NDArray(handle = sliceHandle, writable = this.writable) } + def slice(start: Int): NDArray = { + slice(start, shape(0)) + } + /** * Block until all pending writes operations on current NDArray are finished. * This function will return when all the pending writes to the current @@ -336,8 +340,6 @@ class NDArray(val handle: NDArrayHandle, val writable: Boolean = true) { */ def waitToRead(): Unit = ??? - def apply(sliceStart: Int, sliceEnd: Int): NDArray = _slice(sliceStart, sliceEnd) - /** * Get context of current NDArray. * @return The context of current NDArray.