merge

apache · Sep 28, 2015 · 2e70300 · 2e70300
2 parents 138d058 + cd142c4
commit 2e70300
Show file tree

Hide file tree

Showing 31 changed files with 809 additions and 235 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,6 +3,7 @@ sudo: false
 # Enabling test on Linux and OS X
 os:
   - linux
+  - osx
 
 # Use Build Matrix to do lint and build seperately
 env:

diff --git a/README.md b/README.md
@@ -5,19 +5,18 @@
 [![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/)
 [![Hex.pm](https://img.shields.io/hexpm/l/plug.svg)]()
 
-MXNet is a deep learning framework designed for both *efficiency* and *flexibility*. It
-aims for people
+MXNet is a deep learning framework designed for both *efficiency* and *flexibility*.
+It allows you to mix the [flavors](http://mxnet.readthedocs.org/en/latest/program_model.html) of
+deep learning programs together to maximize the efficiency and your productivity.
 
-- Who want to apply deep learning for applications. One can use only several lines of codes
-  to create and train a neural network with high efficiency. Check our
-  [examples](example) for more details.
-- Who want to use it for research on deep learning. MXNet provides flexible
-  programming interface for rapid prototyping. For example, check our
-  [tutorials for Python](http://mxnet.readthedocs.org/en/latest/python/tutorial.html)
+What's New
+----------
+* [Note on Programming Models for Deep Learning](http://mxnet.readthedocs.org/en/latest/program_model.html)
 
 Contents
 --------
 * [Documentation](http://mxnet.readthedocs.org/en/latest/)
+* [Code Examples](example)
 * [Build Instruction](doc/build.md)
 * [Features](#features)
 * [License](#license)

diff --git a/doc/img/comp_grad_graph.png b/doc/img/comp_grad_graph.png
diff --git a/doc/img/comp_graph.png b/doc/img/comp_graph.png
diff --git a/doc/img/comp_graph_folded.png b/doc/img/comp_graph_folded.png
diff --git a/doc/index.md b/doc/index.md
@@ -15,6 +15,7 @@ User Guide
 
 Developer Guide
 ---------------
+* [Programming Models for Deep Learning](program_model.md)
 * [Developer Documents](developer-guide/index.md)
 * [Environment Variables for MXNet](env_var.md)
 * [Contributor Guideline](contribute.md)

diff --git a/doc/program_model.md b/doc/program_model.md
diff --git a/doc/python/ndarray.md b/doc/python/ndarray.md
@@ -8,9 +8,12 @@ Create NDArray
 Like `numpy`, you could create `mxnet.ndarray` like followings:
 ```python
 >>> import mxnet as mx
->>> a = mx.nd.zeros((100, 50))              # all-zero array of dimension 100x50
->>> b = mx.nd.ones((256, 32, 128, 1))       # all-one array of dimension 256x32x128x1
->>> c = mx.nd.array([[1, 2, 3], [4, 5, 6]]) # initialize array with contents
+>>> # all-zero array of dimension 100x50
+>>> a = mx.nd.zeros((100, 50))
+>>> # all-one array of dimension 256x32x128x1
+>>> b = mx.nd.ones((256, 32, 128, 1))
+>>> # initialize array with contents
+>>> c = mx.nd.array([[1, 2, 3], [4, 5, 6]])
 ```
 
 NDArray operations
@@ -24,9 +27,11 @@ We provide some basic ndarray operations like arithmetic and slice operations. M
 >>> a.shape
 (100L, 50L)
 >>> b = mx.nd.ones((100, 50))
+>>> # c and d will be calculated in parallel here!
 >>> c = a + b
->>> d = a - b     # c and d will be calculated in parallel here!
->>> b += d        # inplace operation, b's contents will be modified, but c and d won't be affected.
+>>> d = a - b
+>>> # inplace operation, b's contents will be modified, but c and d won't be affected.
+>>> b += d
 ```
 
 ### Slice operations
@@ -36,8 +41,8 @@ We provide some basic ndarray operations like arithmetic and slice operations. M
 >>> a[0:10] = 1   # first 10 rows will become 1
 ```
 
-Conversion from/to `numpy.ndarray` and I/O
---------------------------------
+Conversion from/to `numpy.ndarray`
+----------------------------------
 MXNet NDArray supports pretty nature way to convert from/to `mxnet.ndarray` to/from `numpy.ndarray`:
 ```python
 >>> import mxnet as mx
@@ -50,13 +55,20 @@ MXNet NDArray supports pretty nature way to convert from/to `mxnet.ndarray` to/f
 array([ 1., 2., 3.], dtype=float32)
 ```
 
-We also provide two convenient functions to help save and load file from I/O:
+Save Load NDArray
+-----------------
+You can always use pickle to save and load NDArrays.
+We also provide functions to help save and load list or dictionary of NDArrays from file systems.
 ```python
 >>> import mxnet as mx
 >>> a = mx.nd.zeros((100, 200))
->>> mx.nd.save("/path/to/array/file", a)
->>> mx.nd.save("s3://path/to/s3/array", a)
->>> mx.nd.save("hdfs://path/to/hdfs/array", a)
+>>> b = mx.nd.zeros((100, 200))
+>>> # save list of NDArrays
+>>> mx.nd.save("/path/to/array/file", [a, b])
+>>> # save dictionary of NDArrays to AWS S3
+>>> mx.nd.save("s3://path/to/s3/array", {'A' : a, 'B' : b})
+>>> # save list of NDArrays to hdfs.
+>>> mx.nd.save("hdfs://path/to/hdfs/array", [a, b])
 >>> from_file = mx.nd.load("/path/to/array/file")
 >>> from_s3 = mx.nd.load("s3://path/to/s3/array")
 >>> from_hdfs = mx.nd.load("hdfs://path/to/hdfs/array")
@@ -65,8 +77,8 @@ The good thing about using the above `save` and `load` interface is that:
 - You could use the format across all `mxnet` language bindings.
 - Already support S3 and HDFS.
 
-Multi-device support
--------------------
+Multi-device Support
+--------------------
 The device information is stored in `mxnet.Context` structure. When creating ndarray in mxnet, user could either use the context argument (default is CPU context) to create arrays on specific device or use the `with` statement as follows:
 ```python
 >>> import mxnet as mx

diff --git a/doc/python/tutorial.md b/doc/python/tutorial.md
@@ -315,17 +315,34 @@ shape inconsistency.
 
 ### Bind the Symbols and Run
 
-Now we can bind the free variables of the symbol and perform forward and
-backward.
+Now we can bind the free variables of the symbol and perform forward and backward.
+The bind function will create a ```Executor``` that can be used to carry out the real computations.
 
 ```python
->>> in_shape = (128, 3, 100, 100) # minibatch_size, #channel, image_width, image_height
->>> executor = net.simple_bind(mx.gpu(), data = mx.nd.empty(in_shape, mx.gpu())
->>> # feed data and label..
->>> executor.forward()
->>> executor.backward()
->>> print executor.outputs[0].asnumpy()
+>>> # define computation graphs
+>>> A = mx.symbol.Variable('A')
+>>> B = mx.symbol.Variable('B')
+>>> C = A * B
+>>> a = mx.nd.ones(3) * 4
+>>> b = mx.nd.ones(3) * 2
+>>> # bind the symbol with real arguments
+>>> c_exec = C.bind(ctx=mx.cpu(), args={'A' : a, 'B': b})
+>>> # do forward pass calclation.
+>>> c_exec.forward()
+>>> c_exec.outputs[0].asnumpy()
+[ 8.  8.  8.]
 ```
+For neural nets, a more commonly used pattern is ```simple_bind```, which will create
+all the arguments arrays for you. Then you can call forward, and backward(if gradient is needed)
+to get the gradient.
+```python
+>>> # define computation graphs
+>>> net = some symbol
+>>> texec = net.simple_bind(data=input_shape)
+>>> texec.forward()
+>>> texec.backward()
+```
+The [model API](../../python/mxnet/model.py) is a thin wrapper around the symbolic executors to support neural net training.
 
 ### How Efficient is Symbolic API
 

diff --git a/example/README.md b/example/README.md
@@ -4,10 +4,10 @@ This folder contains examples of MXNet.
 
 Notebooks
 --------
-* [composite symbol](composite_symbol.ipynb) gives you a demo of how to composite a symbolic Inception-BatchNorm Network
-* [cifar-10 recipe](cifar-recipe.ipynb) gives you a step by step demo of how to use MXNet
-* [cifar-100](cifar-100.ipynb) gives you a demo of how to train a 75.68% accuracy CIFAR-100 model
-* [predict with pretained model](predict-with-pretrained-model.ipynb) gives you a demo of use a pretrained Inception-BN Network
+* [composite symbol](notebooks/composite_symbol.ipynb) gives you a demo of how to composite a symbolic Inception-BatchNorm Network
+* [cifar-10 recipe](notebooks/cifar-recipe.ipynb) gives you a step by step demo of how to use MXNet
+* [cifar-100](notebooks/cifar-100.ipynb) gives you a demo of how to train a 75.68% accuracy CIFAR-100 model
+* [predict with pretained model](notebooks/predict-with-pretrained-model.ipynb) gives you a demo of use a pretrained Inception-BN Network
 
 
 Contents

diff --git a/example/imagenet/alexnet.py b/example/imagenet/alexnet.py
@@ -16,7 +16,7 @@
 conv2 = mx.symbol.Convolution(
     data=lrn1, kernel=(5, 5), pad=(2, 2), num_filter=256)
 relu2 = mx.symbol.Activation(data=conv2, act_type="relu")
-pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2))
+pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2), pool_type="max")
 lrn2 = mx.symbol.LRN(data=pool2, alpha=0.0001, beta=0.75, knorm=1, nsize=5)
 # stage 3
 conv3 = mx.symbol.Convolution(
@@ -28,7 +28,7 @@
 conv5 = mx.symbol.Convolution(
     data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256)
 relu5 = mx.symbol.Activation(data=conv5, act_type="relu")
-pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2))
+pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2), pool_type="max")
 # stage 4
 flatten = mx.symbol.Flatten(data=pool3)
 fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096)
@@ -48,7 +48,7 @@
 train, val = ilsvrc12_iterator(batch_size=batch_size, input_shape=(3,224,224))
 
 ## train
-num_gpus = 2
+num_gpus = 4
 gpus = [mx.gpu(i) for i in range(num_gpus)]
 model = mx.model.FeedForward(
     ctx           = gpus,

diff --git a/example/imagenet/data.py b/example/imagenet/data.py
@@ -7,17 +7,17 @@
 def ilsvrc12_iterator(batch_size, input_shape):
     """return train and val iterators for imagenet"""
     train_dataiter = mx.io.ImageRecordIter(
-        path_imgrec        = "data/ilsvrc12/train.rec",
-        mean_img           = "data/ilsvrc12/mean.bin",
+        path_imgrec        = "data/train.rec",
+        mean_img           = "data/mean.bin",
         rand_crop          = True,
         rand_mirror        = True,
         prefetch_buffer    = 4,
         preprocess_threads = 4,
         data_shape         = input_shape,
         batch_size         = batch_size)
     val_dataiter = mx.io.ImageRecordIter(
-        path_imgrec        = "data/ilsvrc12/val.rec",
-        mean_img           = "data/ilsvrc12/mean.bin",
+        path_imgrec        = "data/val.rec",
+        mean_img           = "data/mean.bin",
         rand_crop          = False,
         rand_mirror        = False,
         prefetch_buffer    = 4,

diff --git a/include/mxnet/storage.h b/include/mxnet/storage.h
@@ -39,16 +39,16 @@ class Storage {
    * \param ctx Context information about the device and ID.
    * \return Handle struct.
    */
-  Handle Alloc(size_t size, Context ctx);
+  virtual Handle Alloc(size_t size, Context ctx) = 0;
   /*!
    * \brief Free storage.
    * \param handle Handle struect.
    */
-  void Free(Handle handle);
+  virtual void Free(Handle handle) = 0;
   /*!
    * \brief Destructor.
    */
-  ~Storage();
+  virtual ~Storage() {}
   /*!
    * \return Storage singleton.
    */
@@ -62,15 +62,6 @@ class Storage {
    * \return A shared pointer to Storage singleton.
    */
   static std::shared_ptr<Storage> _GetSharedRef();
-
- private:
-  /*!
-   * \brief Hidden constructors.
-   */
-  Storage();
-  struct Impl;
-  std::unique_ptr<Impl> impl_;
-  DISALLOW_COPY_AND_ASSIGN(Storage);
 };  // class Storage
 }  // namespace mxnet
 #endif  // MXNET_STORAGE_H_
diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
@@ -1,11 +1,6 @@
 #!/usr/bin/env python
 # coding: utf-8
-"""MXNet: a concise, fast and flexible framework for deep learning
-
-MXNet is a project that evolves from cxxnet, minerva and purine2.
-The interface is designed in collaboration by authors of three projects.
-
-"""
+"""MXNet: a concise, fast and flexible framework for deep learning. """
 from __future__ import absolute_import
 
 from .context import Context, current_context, cpu, gpu
@@ -14,6 +9,7 @@
 from . import ndarray
 from . import name
 from . import symbol
+# use mx.kv as short for kvstore
 from . import kvstore as kv
 from . import io
 # use mx.nd as short for mx.ndarray

diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
@@ -1,5 +1,7 @@
-# pylint: disable=logging-not-lazy, blacklisted-name, invalid-name
-"""model helper for knowing training status"""
+# coding: utf-8
+"""Callback functions that can be used to track various status during iteration."""
+from __future__ import absolute_import
+
 import sys
 import math
 import logging
@@ -19,11 +21,12 @@ def do_checkpoint(prefix):
     callback : function
         The callback function that can be passed as iter_end_callback to fit.
     """
-    def _callback(iter_no, s, arg, aux):
+    def _callback(iter_no, sym, arg, aux):
         """The checkpoint function."""
-        save_checkpoint(prefix, iter_no + 1, s, arg, aux)
+        save_checkpoint(prefix, iter_no + 1, sym, arg, aux)
     return _callback
 
+
 class Speedometer(object):
     """Calculate training speed in frequent
 
@@ -57,12 +60,13 @@ def __call__(self, count):
         if self.init:
             if count % self.frequent == 0:
                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
-                logging.info("Batch [%d]\tSpeed: %.2f samples/sec" % (count, speed))
+                logging.info("Batch [%d]\tSpeed: %.2f samples/sec", count, speed)
                 self.tic = time.time()
         else:
             self.init = True
             self.tic = time.time()
 
+
 class ProgressBar(object):
     """Show a progress bar
 
@@ -89,7 +93,7 @@ def __call__(self, count):
 
         filled_len = int(round(self.bar_len * count / float(self.total)))
         percents = math.ceil(100.0 * count / float(self.total))
-        bar = '=' * filled_len + '-' * (self.bar_len - filled_len)
-        sys.stdout.write('[%s] %s%s\r' % (bar, percents, '%'))
+        prog_bar = '=' * filled_len + '-' * (self.bar_len - filled_len)
+        sys.stdout.write('[%s] %s%s\r' % (prog_bar, percents, '%'))
 
 
diff --git a/python/mxnet/context.py b/python/mxnet/context.py
@@ -1,5 +1,5 @@
 # coding: utf-8
-""" code for context management """
+"""Context management API of mxnet."""
 from __future__ import absolute_import
 
 class Context(object):
@@ -19,7 +19,6 @@ class Context(object):
 
     Examples
     --------
-    Switch default context example:
     >>> # array on cpu
     >>> cpu_array = mx.md.ones((2, 3))
     >>> # switch default context to GPU(2)