From 92365d432a6b246cf5ce6afc445b7ede311bbe5b Mon Sep 17 00:00:00 2001
From: yajiedesign <yajiedesign@gmail.com>
Date: Sun, 18 Oct 2015 15:37:43 +0800
Subject: [PATCH 1/8] change cmake  out put name to libmexnet.dll

---
 CMakeLists.txt          | 2 +-
 python/mxnet/libinfo.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 607d6ea909c1..d55086fd197a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,7 +95,7 @@ target_link_libraries(mxnet ${mshadow_LINKER_LIBS})
 target_link_libraries(mxnet dmlccore)
 target_link_libraries(mxnet pslite)
 target_link_libraries(mxnet ${pslite_LINKER_LIBS})
-
+set_target_properties(mxnet PROPERTIES OUTPUT_NAME "libmxnet")
 
 # ---[ Linter target
 if(MSVC)
diff --git a/python/mxnet/libinfo.py b/python/mxnet/libinfo.py
index ac883a076543..d026aa660dce 100644
--- a/python/mxnet/libinfo.py
+++ b/python/mxnet/libinfo.py
@@ -24,7 +24,7 @@ def find_lib_path():
             dll_path.append(os.path.join(curr_path, '../../build', vs_configuration))
             dll_path.append(os.path.join(curr_path, '../../windows', vs_configuration))
     if os.name == 'nt':
-        dll_path = [os.path.join(p, 'mxnet.dll') for p in dll_path]
+        dll_path = [os.path.join(p, 'libmxnet.dll') for p in dll_path]
     else:
         dll_path = [os.path.join(p, 'libmxnet.so') for p in dll_path]
     lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]

From 28967793f6901ef71ed67ff1582b363282f5eca4 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sun, 18 Oct 2015 11:25:12 -0700
Subject: [PATCH 2/8] [R] Change License to BSD

---
 R-package/DESCRIPTION |  4 ++--
 R-package/LICENSE     | 28 ++++++++++++++++++++++++++++
 R-package/README.md   |  8 +++++---
 3 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 R-package/LICENSE

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 6bdc16534067..27a81e75861e 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -6,8 +6,8 @@ Date: 2015-10-02
 Author: Tianqi Chen, Qiang Kou, Tong He
 Maintainer: Qiang Kou <qkou@umail.iu.edu>
 Description: MXNet is a deep learning framework designed for both efficiency and flexibility. It allows you to mix the flavours of deep learning programs together to maximize the efficiency and your productivity.
-License: Apache-2.0
-URL: https://github.com/dmlc/mxnet
+License: BSD
+URL: https://github.com/dmlc/mxnet/R-package
 BugReports: https://github.com/dmlc/mxnet/issues
 Imports: methods, Rcpp (>= 0.11.1)
 Suggests: testthat
diff --git a/R-package/LICENSE b/R-package/LICENSE
new file mode 100644
index 000000000000..de64a67e5a5f
--- /dev/null
+++ b/R-package/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2015 by Contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of rabit nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/R-package/README.md b/R-package/README.md
index 309a7874340f..fdc581014b19 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -1,5 +1,5 @@
-MXNet R-Package
-===============
+MXNetR: Deep learning for R
+===========================
 
 You have find MXNet R Package! The MXNet R packages brings flexible and efficient GPU
 computing and state-of-art deep learning to R.
@@ -15,9 +15,11 @@ Resources
 * [MXNet R Package Document](http://mxnet.readthedocs.org/en/latest/R-package/index.html)
   - Check this out for detailed documents, examples, installation guides.
 
-
 Installation
 ------------
 Follow [Installation Guide](http://mxnet.readthedocs.org/en/latest/build.html)
 
+License
+-------
+MXNet R-package is licensed under [BSD](https://github.com/dmlc/mxnet/blob/master/R-Package/LICENSE) license.
 

From 28deec07a9c2ec91335c001c560f809c3bf09a2e Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Sun, 18 Oct 2015 12:14:09 -0700
Subject: [PATCH 3/8] Update README.md

---
 R-package/README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R-package/README.md b/R-package/README.md
index fdc581014b19..1a25eb7498c4 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -1,5 +1,7 @@
-MXNetR: Deep learning for R
-===========================
+<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnet2.png width=135/> R: Deep Learning for R
+==========================
+[![Build Status](https://travis-ci.org/dmlc/mxnet.svg?branch=master)](https://travis-ci.org/dmlc/mxnet)
+[![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/R-package/index.html)
 
 You have find MXNet R Package! The MXNet R packages brings flexible and efficient GPU
 computing and state-of-art deep learning to R.

From 6a71e36697bc23af1cd82aaec2c8091a82366fb0 Mon Sep 17 00:00:00 2001
From: muli <muli@cs.cmu.edu>
Date: Sun, 18 Oct 2015 15:50:02 -0400
Subject: [PATCH 4/8] update r logo

---
 R-package/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/R-package/README.md b/R-package/README.md
index 1a25eb7498c4..af33de0ed97b 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -1,4 +1,4 @@
-<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnet2.png width=135/> R: Deep Learning for R
+<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnetR.png width=135/> Deep Learning for R
 ==========================
 [![Build Status](https://travis-ci.org/dmlc/mxnet.svg?branch=master)](https://travis-ci.org/dmlc/mxnet)
 [![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/R-package/index.html)
@@ -24,4 +24,3 @@ Follow [Installation Guide](http://mxnet.readthedocs.org/en/latest/build.html)
 License
 -------
 MXNet R-package is licensed under [BSD](https://github.com/dmlc/mxnet/blob/master/R-Package/LICENSE) license.
-

From 685c6f1193ac96ab4a3d35b441b020d7507ed755 Mon Sep 17 00:00:00 2001
From: muli <muli@cs.cmu.edu>
Date: Sun, 18 Oct 2015 15:50:35 -0400
Subject: [PATCH 5/8] update

---
 R-package/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/README.md b/R-package/README.md
index af33de0ed97b..3c46288fb8c8 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -1,4 +1,4 @@
-<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnetR.png width=135/> Deep Learning for R
+<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnetR.png width=155/> Deep Learning for R
 ==========================
 [![Build Status](https://travis-ci.org/dmlc/mxnet.svg?branch=master)](https://travis-ci.org/dmlc/mxnet)
 [![Documentation Status](https://readthedocs.org/projects/mxnet/badge/?version=latest)](http://mxnet.readthedocs.org/en/latest/R-package/index.html)

From 9f77880f5551ea836e7bf49c47c6fa110b11f228 Mon Sep 17 00:00:00 2001
From: Bing Xu <antinucleon@gmail.com>
Date: Sun, 18 Oct 2015 16:09:27 -0600
Subject: [PATCH 6/8] [R, DOC] update rmarkdown doc

---
 R-package/vignettes/mnistCompetition.Rmd      |  59 +++--
 .../vignettes/ndarrayAndSymbolTutorial.Rmd    | 117 +++-------
 doc/R-package/Makefile                        |   8 +-
 doc/R-package/mnistCompetition.md             | 159 ++++++++++++-
 doc/R-package/ndarrayAndSymbolTutorial.md     | 217 ++++--------------
 5 files changed, 278 insertions(+), 282 deletions(-)

diff --git a/R-package/vignettes/mnistCompetition.Rmd b/R-package/vignettes/mnistCompetition.Rmd
index 20fdd83ddd57..ebfbc505907a 100644
--- a/R-package/vignettes/mnistCompetition.Rmd
+++ b/R-package/vignettes/mnistCompetition.Rmd
@@ -1,7 +1,11 @@
 Handwritten Digits Classification Competition
-======================================================
+=============================================
 
-[MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. We will present the basic usage of `mxnet` to compete in this challenge.
+[MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set.
+We will present the basic usage of [mxnet](https://github.com/dmlc/mxnet/tree/master/R-package) to compete in this challenge.
+
+This tutorial is written in Rmarkdown. You can download the source [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/mnistCompetition.Rmd) and view a
+hosted version of tutorial [here](http://mxnet.readthedocs.org/en/latest/R-package/mnistCompetition.html).
 
 ## Data Loading
 
@@ -9,7 +13,7 @@ First, let us download the data from [here](https://www.kaggle.com/c/digit-recog
 
 Then we can read them in R and convert to matrices.
 
-```{r, eval=FALSE}
+```{r}
 require(mxnet)
 train <- read.csv('data/train.csv', header=TRUE)
 test <- read.csv('data/test.csv', header=TRUE)
@@ -22,14 +26,14 @@ train.y <- train[,1]
 
 Here every image is represented as a single row in train/test. The greyscale of each image falls in the range [0, 255], we can linearly transform it into [0,1] by
 
-```{r, eval=FALSE}
+```{r}
 train.x <- train.x/255
 test <- test/255
 ```
 
 In the label part, we see the number of each digit is fairly even:
 
-```{r, eval=FALSE}
+```{r}
 table(train.y)
 ```
 
@@ -37,7 +41,7 @@ table(train.y)
 
 Now we have the data. The next step is to configure the structure of our network.
 
-```{r, eval=FALSE}
+```{r}
 data <- mx.symbol.Variable("data")
 fc1 <- mx.symbol.FullyConnected(data, name="fc1", num_hidden=128)
 act1 <- mx.symbol.Activation(fc1, name="relu1", act_type="relu")
@@ -59,15 +63,13 @@ softmax <- mx.symbol.Softmax(fc3, name="sm")
 
 We are almost ready for the training process. Before we start the computation, let's decide what device should we use.
 
-```{r, eval=FALSE}
-devices <- lapply(1:2, function(i) {
-  mx.cpu(i)
-})
+```{r}
+devices <- mx.cpu()
 ```
 
-Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`.
+Here we assign CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`.
 
-```{r, eval=FALSE}
+```{r}
 mx.set.seed(0)
 model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y,
                                      ctx=devices, num.round=10, array.batch.size=100,
@@ -80,21 +82,21 @@ model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y,
 
 To make prediction, we can simply write
 
-```{r, eval=FALSE}
+```{r}
 preds <- predict(model, test)
 dim(preds)
 ```
 
 It is a matrix with 28000 rows and 10 cols, containing the desired classification probabilities from the output layer. To extract the maximum label for each row, we can use the `max.col` in R:
 
-```{r, eval=FALSE}
+```{r}
 pred.label <- max.col(preds) - 1
 table(pred.label)
 ```
 
 With a little extra effort in the csv format, we can have our submission to the competition!
 
-```{r, eval=FALSE}
+```{r}
 submission <- data.frame(ImageId=1:nrow(test), Label=pred.label)
 write.csv(submission, file='submission.csv', row.names=FALSE, quote=FALSE)
 ```
@@ -105,7 +107,7 @@ Next we are going to introduce a new network structure: [LeNet](http://yann.lecu
 
 First we construct the network:
 
-```{r, eval=FALSE}
+```{r}
 # input
 data <- mx.symbol.Variable('data')
 # first conv
@@ -130,7 +132,7 @@ lenet <- mx.symbol.Softmax(data=fc2)
 
 Then let us reshape the matrices into arrays:
 
-```{r, eval=FALSE}
+```{r}
 train.array <- t(train.x)
 dim(train.array) <- c(1,28,28,nrow(train.x))
 train.array <- aperm(train.array, c(4,1,2,3))
@@ -141,38 +143,47 @@ test.array <- aperm(test.array, c(4,1,2,3))
 
 Next we are going to compare the training speed on different devices, so the definition of the devices goes first:
 
-```{r, eval=FALSE}
+```{r}
+n.gpu <- 1 
 device.cpu <- mx.cpu()
-device.gpu <- lapply(1:4, function(i) {
+device.gpu <- lapply(0:(n.gpu-1), function(i) {
   mx.gpu(i)
 })
 ```
 
-Training on CPU:
+As you can see, we can pass a list of devices, to ask mxnet to train on multiple GPUs (you can do similar thing for cpu,
+but since internal computation of cpu is already multi-threaded, there is less gain than using GPUs).
+
+We start by training on CPU first. Because it takes a bit time to do so, we will only run it for one iteration.
 
-```{r, eval=FALSE}
+```{r}
 mx.set.seed(0)
+tic <- proc.time()
 model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=100,
+                                     ctx=device.cpu, num.round=1, array.batch.size=100,
                                      learning.rate=0.05, momentum=0.9, wd=0.00001,
                                      eval.metric=mx.metric.accuracy,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
+print(proc.time() - tic) 
 ```
 
 Training on GPU:
 
-```{r, eval=FALSE}
+```{r}
 mx.set.seed(0)
+tic <- proc.time()
 model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
                                      ctx=device.gpu, num.round=5, array.batch.size=100,
                                      learning.rate=0.05, momentum=0.9, wd=0.00001,
                                      eval.metric=mx.metric.accuracy,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
+print(proc.time() - tic) 
 ```
 
+As you can see by using GPU, we can get a much faster speedup in training!
 Finally we can submit the result to Kaggle again to see the improvement of our ranking!
 
-```{r, eval=FALSE}
+```{r}
 preds <- predict(model, test.array)
 pred.label <- max.col(preds) - 1
 submission <- data.frame(ImageId=1:nrow(test), Label=pred.label)
diff --git a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd
index 69f163ba6ad2..0f69d5449344 100644
--- a/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd
+++ b/R-package/vignettes/ndarrayAndSymbolTutorial.Rmd
@@ -1,22 +1,20 @@
 MXNet R Tutorial on NDArray and Symbol
-============================
+======================================
 
-This vignette gives a general overview of MXNet's R package.  MXNet contains a
+This vignette gives a general overview of MXNet"s R package.  MXNet contains a
 mixed flavor of elements to bake flexible and efficient
-applications. There are mainly three concepts:
+applications. There are two major concepts introduced in this tutorial.
 
 * [NDArray](#ndarray-numpy-style-tensor-computations-on-cpus-and-gpus)
   offers matrix and tensor computations on both CPU and GPU, with automatic
   parallelization
 * [Symbol](#symbol-and-automatic-differentiation) makes defining a neural
   network extremely easy, and provides automatic differentiation.
-* [KVStore](#distributed-key-value-store) easy the data synchronization between
-  multi-GPUs and multi-machines.
 
 ## NDArray: Vectorized tensor computations on CPUs and GPUs
 
 `NDArray` is the basic vectorized operation unit in MXNet for matrix and tensor computations. 
-Users can perform usual calculations as on R's array, but with two additional features:
+Users can perform usual calculations as on R"s array, but with two additional features:
 
 1.  **multiple devices**: all operations can be run on various devices including
 CPU and GPU
@@ -25,16 +23,17 @@ CPU and GPU
 
 ### Create and Initialization
 
-Let's create `NDArray` on either GPU or CPU
+Let"s create `NDArray` on either GPU or CPU
 
 ```{r}
 require(mxnet)
 a <- mx.nd.zeros(c(2, 3)) # create a 2-by-3 matrix on cpu
-b <- mx.nd.zeros(c(2, 3), mx.cpu()) # create a 2-by-3 matrix on gpu 0
-c <- mx.nd.zeros(c(2, 3), mx.gpu(1)) # create a 2-by-3 matrix on gpu 0
+b <- mx.nd.zeros(c(2, 3), mx.cpu()) # create a 2-by-3 matrix on cpu
+# c <- mx.nd.zeros(c(2, 3), mx.gpu(0)) # create a 2-by-3 matrix on gpu 0, if you have CUA enabled.
 ```
 
-We can also initialize an `NDArray` object in various ways:
+As a side note, normally for CUDA enabled devices, the device id of GPU starts from 0.
+So that is why we passed in 0 to GPU id. We can also initialize an `NDArray` object in various ways:
 
 ```{r}
 a <- mx.nd.ones(c(4, 4))
@@ -58,7 +57,7 @@ b
 You can perform elemental-wise operations on `NDArray` objects:
 
 ```{r}
-a <- mx.nd.ones(c(2, 3)) * 2
+a <- mx.nd.ones(c(2, 4)) * 2
 b <- mx.nd.ones(c(2, 4)) / 8
 as.array(a)
 as.array(b)
@@ -80,17 +79,17 @@ as.array(c)
 
 #### Load and Save
 
-You can save an `NDArray` object to your disk with `mx.nd.save`:
+You can save a list of `NDArray` object to your disk with `mx.nd.save`:
 
 ```{r}
 a <- mx.nd.ones(c(2, 3))
-mx.nd.save(a, 'temp.ndarray')
+mx.nd.save(list(a), "temp.ndarray")
 ```
 
 You can also load it back easily:
 
 ```{r}
-a <- mx.nd.load('temp.ndarray')
+a <- mx.nd.load("temp.ndarray")
 as.array(a[[1]])
 ```
 
@@ -98,8 +97,8 @@ In case you want to save data to the distributed file system such as S3 and HDFS
 we can directly save to and load from them. For example:
 
 ```{r,eval=FALSE}
-mx.nd.save(a, 's3://mybucket/mydata.bin')
-mx.nd.save(a, 'hdfs///users/myname/mydata.bin')
+mx.nd.save(list(a), "s3://mybucket/mydata.bin")
+mx.nd.save(list(a), "hdfs///users/myname/mydata.bin")
 ```
 
 ### Automatic Parallelization
@@ -139,7 +138,7 @@ the read and write dependency and find a best way to execute them in
 parallel.
 
 The actual computations are finished if we want to copy the results into some
-other place, such as `as.array(a)` or `mx.nd.save(a, 'temp.dat')`. Therefore, if we
+other place, such as `as.array(a)` or `mx.nd.save(a, "temp.dat")`. Therefore, if we
 want to write highly parallelized codes, we only need to postpone when we need
 the results.
 
@@ -153,11 +152,11 @@ The following codes create a two layer perceptrons network:
 
 ```{r}
 require(mxnet)
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
-net <- mx.symbol.Activation(data=net, name='relu1', act_type="relu")
-net <- mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64)
-net <- mx.symbol.Softmax(data=net, name='out')
+net <- mx.symbol.Variable("data")
+net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128)
+net <- mx.symbol.Activation(data=net, name="relu1", act_type="relu")
+net <- mx.symbol.FullyConnected(data=net, name="fc2", num_hidden=64)
+net <- mx.symbol.Softmax(data=net, name="out")
 class(net)
 ```
 
@@ -183,24 +182,23 @@ As can be seen, these arguments are the parameters need by each symbol:
 We can also specify the automatic generated names explicitly:
 
 ```{r}
-net <- mx.symbol.Variable('data')
-w <- mx.symbol.Variable('myweight')
-net <- sym.FullyConnected(data=data, weight=w, name='fc1', num_hidden=128)
+data <- mx.symbol.Variable("data")
+w <- mx.symbol.Variable("myweight")
+net <- mx.symbol.FullyConnected(data=data, weight=w, name="fc1", num_hidden=128)
 arguments(net)
 ```
 
 ### More Complicated Composition
 
-MXNet provides well-optimized symbols (see
-[src/operator](https://github.com/dmlc/mxnet/tree/master/src/operator)) for
+MXNet provides well-optimized symbols for
 commonly used layers in deep learning. We can also easily define new operators
 in python.  The following example first performs an elementwise add between two
 symbols, then feed them to the fully connected operator.
 
 ```{r}
-lhs <- mx.symbol.Variable('data1')
-rhs <- mx.symbol.Variable('data2')
-net <- mx.symbol.FullyConnected(data=lhs + rhs, name='fc1', num_hidden=128)
+lhs <- mx.symbol.Variable("data1")
+rhs <- mx.symbol.Variable("data2")
+net <- mx.symbol.FullyConnected(data=lhs + rhs, name="fc1", num_hidden=128)
 arguments(net)
 ```
 
@@ -208,53 +206,23 @@ We can also construct symbol in a more flexible way rather than the single
 forward composition we addressed before.
 
 ```{r}
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
-net2 <- mx.symbol.Variable('data2')
-net2 <- mx.symbol.FullyConnected(data=net2, name='net2', num_hidden=128)
-composed_net <- net(data=net2, name='compose')
-arguments(composed_net)
+net <- mx.symbol.Variable("data")
+net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128)
+net2 <- mx.symbol.Variable("data2")
+net2 <- mx.symbol.FullyConnected(data=net2, name="net2", num_hidden=128)
+composed.net <- mx.apply(net, data=net2, name="compose")
+arguments(composed.net)
 ```
 
 In the above example, *net* is used a function to apply to an existing symbol
-*net*, the resulting *composed_net* will replace the original argument *data* by
+*net*, the resulting *composed.net* will replace the original argument *data* by
 *net2* instead.
 
-### Argument Shapes Inference
-
-Now we have known how to define the symbol. Next we can inference the shapes of
-all the arguments it needed by given the input data shape.
-
-```{r}
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=10)
-```
-
-The shape inference can be used as an earlier debugging mechanism to detect
-shape inconsistency.
-
-### Bind the Symbols and Run
-
-Now we can bind the free variables of the symbol and perform forward and backward.
-The bind function will create an ```Executor``` that can be used to carry out the real computations.
-
-For neural nets, a more commonly used pattern is ```simple_bind```, which will create
-all the arguments arrays for you. Then you can call forward, and backward(if gradient is needed)
-to get the gradient.
-
-```{r, eval=FALSE}
-A <- mx.symbol.Variable('A')
-B <- mx.symbol.Variable('B')
-C <- A * B
-
-texec <- mx.simple.bind(C)
-texec.forward()
-texec.backward()
-```
+### Training a Neural Net.
 
 The [model API](../../R-package/R/model.R) is a thin wrapper around the symbolic executors to support neural net training.
 
-You are also highly encouraged to read [Symbolic Configuration and Execution in Pictures](symbol_in_pictures.md),
+You are also highly encouraged to read [Symbolic Configuration and Execution in Pictures for python package](../python/symbol_in_pictures.md),
 which provides a detailed explanation of concepts in pictures.
 
 ### How Efficient is Symbolic API
@@ -272,14 +240,3 @@ extremely efficient.  We also provide fine grained operators for more flexible
 composition. Because we are also doing more inplace memory allocation, mxnet can
 be ***more memory efficient*** than cxxnet, and gets to same runtime, with
 greater flexiblity.
-
-
-
-
-
-
-
-
-
-
-
diff --git a/doc/R-package/Makefile b/doc/R-package/Makefile
index 5dcd78adbdb3..ac7ac957fc05 100644
--- a/doc/R-package/Makefile
+++ b/doc/R-package/Makefile
@@ -1,11 +1,11 @@
 # This is the makefile for compiling Rmarkdown files into the md file with results.
 PKGROOT=../../R-package
 
-# ADD The Markdown to be built here
+# ADD The Markdown to be built here, with suffix md
 classifyRealImageWithPretrainedModel.md:
-mnistCompetition.Rmd:
-ndarrayAndSymbolTutorial.Rmd:
-fiveMinutesNeuralNetwork.Rmd:
+mnistCompetition.md:
+ndarrayAndSymbolTutorial.md:
+fiveMinutesNeuralNetwork.md:
 
 # General Rules for build rmarkdowns, need knitr
 %.md: $(PKGROOT)/vignettes/%.Rmd
diff --git a/doc/R-package/mnistCompetition.md b/doc/R-package/mnistCompetition.md
index 189f016dd4a4..0e73f7700486 100644
--- a/doc/R-package/mnistCompetition.md
+++ b/doc/R-package/mnistCompetition.md
@@ -1,7 +1,11 @@
 Handwritten Digits Classification Competition
 ======================================================
 
-[MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. We will present the basic usage of `mxnet` to compete in this challenge.
+[MNIST](http://yann.lecun.com/exdb/mnist/) is a handwritten digits image data set created by Yann LeCun. Every digit is represented by a 28x28 image. It has become a standard data set to test classifiers on simple image input. Neural network is no doubt a strong model for image classification tasks. There's a [long-term hosted competition](https://www.kaggle.com/c/digit-recognizer) on Kaggle using this data set. We will present the basic usage of [mxnet](https://github.com/dmlc/mxnet/tree/master/R-package) to compete in this challenge.
+
+This tutorial is written in Rmarkdown. You can download the source [here](https://github.com/dmlc/mxnet/blob/master/R-package/vignettes/mnistCompetition.Rmd) and view a
+hosted version of tutorial [here](http://mxnet.readthedocs.org/en/latest/R-package/mnistCompetition.html).
+
 
 ## Data Loading
 
@@ -12,6 +16,14 @@ Then we can read them in R and convert to matrices.
 
 ```r
 require(mxnet)
+```
+
+```
+## Loading required package: mxnet
+## Loading required package: methods
+```
+
+```r
 train <- read.csv('data/train.csv', header=TRUE)
 test <- read.csv('data/test.csv', header=TRUE)
 train <- data.matrix(train)
@@ -36,6 +48,12 @@ In the label part, we see the number of each digit is fairly even:
 table(train.y)
 ```
 
+```
+## train.y
+##    0    1    2    3    4    5    6    7    8    9 
+## 4132 4684 4177 4351 4072 3795 4137 4401 4063 4188
+```
+
 ## Network Configuration
 
 Now we have the data. The next step is to configure the structure of our network.
@@ -65,12 +83,10 @@ We are almost ready for the training process. Before we start the computation, l
 
 
 ```r
-devices <- lapply(1:2, function(i) {
-  mx.cpu(i)
-})
+devices <- mx.cpu()
 ```
 
-Here we assign two threads of our CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`.
+Here we assign CPU to `mxnet`. After all these preparation, you can run the following command to train the neural network! Note that `mx.set.seed` is the correct function to control the random process in `mxnet`.
 
 
 ```r
@@ -82,6 +98,60 @@ model <- mx.model.FeedForward.create(softmax, X=train.x, y=train.y,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
+```
+## Start training with 1 devices
+## Batch [100] Train-accuracy=0.6563
+## Batch [200] Train-accuracy=0.777999999999999
+## Batch [300] Train-accuracy=0.827466666666665
+## Batch [400] Train-accuracy=0.855499999999999
+## [1] Train-accuracy=0.859832935560859
+## Batch [100] Train-accuracy=0.9529
+## Batch [200] Train-accuracy=0.953049999999999
+## Batch [300] Train-accuracy=0.955866666666666
+## Batch [400] Train-accuracy=0.957525000000001
+## [2] Train-accuracy=0.958309523809525
+## Batch [100] Train-accuracy=0.968
+## Batch [200] Train-accuracy=0.9677
+## Batch [300] Train-accuracy=0.9696
+## Batch [400] Train-accuracy=0.970650000000002
+## [3] Train-accuracy=0.970809523809526
+## Batch [100] Train-accuracy=0.973
+## Batch [200] Train-accuracy=0.974249999999999
+## Batch [300] Train-accuracy=0.976
+## Batch [400] Train-accuracy=0.977100000000003
+## [4] Train-accuracy=0.977452380952384
+## Batch [100] Train-accuracy=0.9834
+## Batch [200] Train-accuracy=0.981949999999999
+## Batch [300] Train-accuracy=0.981900000000001
+## Batch [400] Train-accuracy=0.982600000000003
+## [5] Train-accuracy=0.983000000000003
+## Batch [100] Train-accuracy=0.983399999999999
+## Batch [200] Train-accuracy=0.98405
+## Batch [300] Train-accuracy=0.985000000000001
+## Batch [400] Train-accuracy=0.985725000000003
+## [6] Train-accuracy=0.985952380952384
+## Batch [100] Train-accuracy=0.988999999999999
+## Batch [200] Train-accuracy=0.9876
+## Batch [300] Train-accuracy=0.988100000000001
+## Batch [400] Train-accuracy=0.988750000000003
+## [7] Train-accuracy=0.988880952380955
+## Batch [100] Train-accuracy=0.991999999999999
+## Batch [200] Train-accuracy=0.9912
+## Batch [300] Train-accuracy=0.990066666666668
+## Batch [400] Train-accuracy=0.990275000000003
+## [8] Train-accuracy=0.990452380952384
+## Batch [100] Train-accuracy=0.9937
+## Batch [200] Train-accuracy=0.99235
+## Batch [300] Train-accuracy=0.991966666666668
+## Batch [400] Train-accuracy=0.991425000000003
+## [9] Train-accuracy=0.991500000000003
+## Batch [100] Train-accuracy=0.9942
+## Batch [200] Train-accuracy=0.99245
+## Batch [300] Train-accuracy=0.992433333333334
+## Batch [400] Train-accuracy=0.992275000000002
+## [10] Train-accuracy=0.992380952380955
+```
+
 ## Prediction and Submission
 
 To make prediction, we can simply write
@@ -92,6 +162,10 @@ preds <- predict(model, test)
 dim(preds)
 ```
 
+```
+## [1] 28000    10
+```
+
 It is a matrix with 28000 rows and 10 cols, containing the desired classification probabilities from the output layer. To extract the maximum label for each row, we can use the `max.col` in R:
 
 
@@ -100,6 +174,12 @@ pred.label <- max.col(preds) - 1
 table(pred.label)
 ```
 
+```
+## pred.label
+##    0    1    2    3    4    5    6    7    8    9 
+## 2818 3195 2744 2767 2683 2596 2798 2790 2784 2825
+```
+
 With a little extra effort in the csv format, we can have our submission to the competition!
 
 
@@ -154,29 +234,53 @@ Next we are going to compare the training speed on different devices, so the def
 
 
 ```r
+n.gpu <- 1 
 device.cpu <- mx.cpu()
-device.gpu <- lapply(1:4, function(i) {
+device.gpu <- lapply(0:(n.gpu-1), function(i) {
   mx.gpu(i)
 })
 ```
 
-Training on CPU:
+As you can see, we can pass a list of devices, to ask mxnet to train on multiple GPUs (you can do similar thing for cpu,
+but since internal computation of cpu is already multi-threaded, there is less gain than using GPUs).
+
+We start by training on CPU first. Because it takes a bit time to do so, we will only run it for one iteration.
 
 
 ```r
 mx.set.seed(0)
+tic <- proc.time()
 model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
-                                     ctx=device.cpu, num.round=5, array.batch.size=100,
+                                     ctx=device.cpu, num.round=1, array.batch.size=100,
                                      learning.rate=0.05, momentum=0.9, wd=0.00001,
                                      eval.metric=mx.metric.accuracy,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
+```
+## Start training with 1 devices
+## Batch [100] Train-accuracy=0.1054
+## Batch [200] Train-accuracy=0.1237
+## Batch [300] Train-accuracy=0.352766666666667
+## Batch [400] Train-accuracy=0.498824999999999
+## [1] Train-accuracy=0.519546539379474
+```
+
+```r
+print(proc.time() - tic) 
+```
+
+```
+##    user  system elapsed 
+## 132.340 203.621  84.825
+```
+
 Training on GPU:
 
 
 ```r
 mx.set.seed(0)
+tic <- proc.time()
 model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
                                      ctx=device.gpu, num.round=5, array.batch.size=100,
                                      learning.rate=0.05, momentum=0.9, wd=0.00001,
@@ -184,6 +288,45 @@ model <- mx.model.FeedForward.create(lenet, X=train.array, y=train.y,
                                      epoch.end.callback=mx.callback.log.train.metric(100))
 ```
 
+```
+## Start training with 1 devices
+## Batch [100] Train-accuracy=0.1055
+## Batch [200] Train-accuracy=0.1197
+## Batch [300] Train-accuracy=0.346266666666667
+## Batch [400] Train-accuracy=0.4925
+## [1] Train-accuracy=0.513699284009546
+## Batch [100] Train-accuracy=0.9577
+## Batch [200] Train-accuracy=0.961849999999999
+## Batch [300] Train-accuracy=0.966
+## Batch [400] Train-accuracy=0.968750000000003
+## [2] Train-accuracy=0.969404761904765
+## Batch [100] Train-accuracy=0.977399999999999
+## Batch [200] Train-accuracy=0.97815
+## Batch [300] Train-accuracy=0.980033333333335
+## Batch [400] Train-accuracy=0.981400000000003
+## [3] Train-accuracy=0.981761904761908
+## Batch [100] Train-accuracy=0.985799999999999
+## Batch [200] Train-accuracy=0.98575
+## Batch [300] Train-accuracy=0.986666666666668
+## Batch [400] Train-accuracy=0.987550000000003
+## [4] Train-accuracy=0.987880952380955
+## Batch [100] Train-accuracy=0.9918
+## Batch [200] Train-accuracy=0.9908
+## Batch [300] Train-accuracy=0.991566666666668
+## Batch [400] Train-accuracy=0.992175000000002
+## [5] Train-accuracy=0.992380952380955
+```
+
+```r
+print(proc.time() - tic) 
+```
+
+```
+##    user  system elapsed 
+##  10.176   1.608   7.743
+```
+
+As you can see by using GPU, we can get a much faster speedup in training!
 Finally we can submit the result to Kaggle again to see the improvement of our ranking!
 
 
diff --git a/doc/R-package/ndarrayAndSymbolTutorial.md b/doc/R-package/ndarrayAndSymbolTutorial.md
index b5572c5e3d9d..94cc8c2f5d69 100644
--- a/doc/R-package/ndarrayAndSymbolTutorial.md
+++ b/doc/R-package/ndarrayAndSymbolTutorial.md
@@ -1,22 +1,20 @@
 MXNet R Tutorial on NDArray and Symbol
-============================
+======================================
 
-This vignette gives a general overview of MXNet's R package.  MXNet contains a
+This vignette gives a general overview of MXNet"s R package.  MXNet contains a
 mixed flavor of elements to bake flexible and efficient
-applications. There are mainly three concepts:
+applications. There are two major concepts introduced in this tutorial.
 
 * [NDArray](#ndarray-numpy-style-tensor-computations-on-cpus-and-gpus)
   offers matrix and tensor computations on both CPU and GPU, with automatic
   parallelization
 * [Symbol](#symbol-and-automatic-differentiation) makes defining a neural
   network extremely easy, and provides automatic differentiation.
-* [KVStore](#distributed-key-value-store) easy the data synchronization between
-  multi-GPUs and multi-machines.
 
 ## NDArray: Vectorized tensor computations on CPUs and GPUs
 
 `NDArray` is the basic vectorized operation unit in MXNet for matrix and tensor computations. 
-Users can perform usual calculations as on R's array, but with two additional features:
+Users can perform usual calculations as on R"s array, but with two additional features:
 
 1.  **multiple devices**: all operations can be run on various devices including
 CPU and GPU
@@ -25,7 +23,7 @@ CPU and GPU
 
 ### Create and Initialization
 
-Let's create `NDArray` on either GPU or CPU
+Let"s create `NDArray` on either GPU or CPU
 
 
 ```r
@@ -39,30 +37,12 @@ require(mxnet)
 
 ```r
 a <- mx.nd.zeros(c(2, 3)) # create a 2-by-3 matrix on cpu
-b <- mx.nd.zeros(c(2, 3), mx.gpu()) # create a 2-by-3 matrix on gpu 0
+b <- mx.nd.zeros(c(2, 3), mx.cpu()) # create a 2-by-3 matrix on cpu
+# c <- mx.nd.zeros(c(2, 3), mx.gpu(0)) # create a 2-by-3 matrix on gpu 0, if you have CUA enabled.
 ```
 
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/storage/storage.cc:43: Please compile with CUDA enabled
-```
-
-```r
-c <- mx.nd.zeros(c(2, 3), mx.gpu(2)) # create a 2-by-3 matrix on gpu 0
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/storage/storage.cc:43: Please compile with CUDA enabled
-```
-
-```r
-c$dim()
-```
-
-```
-## Error in c$dim: object of type 'builtin' is not subsettable
-```
-
-We can also initialize an `NDArray` object in various ways:
+As a side note, normally for CUDA enabled devices, the device id of GPU starts from 0.
+So that is why we passed in 0 to GPU id. We can also initialize an `NDArray` object in various ways:
 
 
 ```r
@@ -102,15 +82,15 @@ You can perform elemental-wise operations on `NDArray` objects:
 
 
 ```r
-a <- mx.nd.ones(c(2, 3)) * 2
+a <- mx.nd.ones(c(2, 4)) * 2
 b <- mx.nd.ones(c(2, 4)) / 8
 as.array(a)
 ```
 
 ```
-##      [,1] [,2] [,3]
-## [1,]    2    2    2
-## [2,]    2    2    2
+##      [,1] [,2] [,3] [,4]
+## [1,]    2    2    2    2
+## [2,]    2    2    2    2
 ```
 
 ```r
@@ -125,34 +105,24 @@ as.array(b)
 
 ```r
 c <- a + b
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/ndarray/./ndarray_function.h:20: Check failed: lshape == rshape operands shape mismatch
-```
-
-```r
 as.array(c)
 ```
 
 ```
-## [1] 1 2 3 4 5
+##       [,1]  [,2]  [,3]  [,4]
+## [1,] 2.125 2.125 2.125 2.125
+## [2,] 2.125 2.125 2.125 2.125
 ```
 
 ```r
 d <- c / a - 5
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/ndarray/./ndarray_function.h:20: Check failed: lshape == rshape operands shape mismatch
-```
-
-```r
 as.array(d)
 ```
 
 ```
-## Error in as.array(d): object 'd' not found
+##         [,1]    [,2]    [,3]    [,4]
+## [1,] -3.9375 -3.9375 -3.9375 -3.9375
+## [2,] -3.9375 -3.9375 -3.9375 -3.9375
 ```
 
 If two `NDArray`s sit on different divices, we need to explicitly move them 
@@ -162,59 +132,32 @@ into the same one. For instance:
 ```r
 a <- mx.nd.ones(c(2, 3)) * 2
 b <- mx.nd.ones(c(2, 3), mx.gpu()) / 8
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/storage/storage.cc:43: Please compile with CUDA enabled
-```
-
-```r
 c <- mx.nd.copyto(a, mx.gpu()) * b
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/storage/storage.cc:43: Please compile with CUDA enabled
-```
-
-```r
 as.array(c)
 ```
 
-```
-## [1] 1 2 3 4 5
-```
-
 #### Load and Save
 
-You can save an `NDArray` object to your disk with `mx.nd.save`:
+You can save a list of `NDArray` object to your disk with `mx.nd.save`:
 
 
 ```r
 a <- mx.nd.ones(c(2, 3))
-mx.nd.save(a, 'temp.ndarray')
-```
-
-```
-## Error in eval(expr, envir, enclos): could not convert using R function : as.list
+mx.nd.save(list(a), "temp.ndarray")
 ```
 
 You can also load it back easily:
 
 
 ```r
-a <- mx.nd.load('temp.ndarray')
-```
-
-```
-## Error in eval(expr, envir, enclos): [15:41:37] src/io/local_filesys.cc:149: Check failed: allow_null  LocalFileSystem: fail to open "temp.ndarray"
-```
-
-```r
+a <- mx.nd.load("temp.ndarray")
 as.array(a[[1]])
 ```
 
 ```
-## Error in a[[1]]: object of type 'externalptr' is not subsettable
+##      [,1] [,2] [,3]
+## [1,]    1    1    1
+## [2,]    1    1    1
 ```
 
 In case you want to save data to the distributed file system such as S3 and HDFS, 
@@ -222,8 +165,8 @@ we can directly save to and load from them. For example:
 
 
 ```r
-mx.nd.save(a, 's3://mybucket/mydata.bin')
-mx.nd.save(a, 'hdfs///users/myname/mydata.bin')
+mx.nd.save(list(a), "s3://mybucket/mydata.bin")
+mx.nd.save(list(a), "hdfs///users/myname/mydata.bin")
 ```
 
 ### Automatic Parallelization
@@ -264,7 +207,7 @@ the read and write dependency and find a best way to execute them in
 parallel.
 
 The actual computations are finished if we want to copy the results into some
-other place, such as `as.array(a)` or `mx.nd.save(a, 'temp.dat')`. Therefore, if we
+other place, such as `as.array(a)` or `mx.nd.save(a, "temp.dat")`. Therefore, if we
 want to write highly parallelized codes, we only need to postpone when we need
 the results.
 
@@ -279,11 +222,11 @@ The following codes create a two layer perceptrons network:
 
 ```r
 require(mxnet)
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
-net <- mx.symbol.Activation(data=net, name='relu1', act_type="relu")
-net <- mx.symbol.FullyConnected(data=net, name='fc2', num_hidden=64)
-net <- mx.symbol.Softmax(data=net, name='out')
+net <- mx.symbol.Variable("data")
+net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128)
+net <- mx.symbol.Activation(data=net, name="relu1", act_type="relu")
+net <- mx.symbol.FullyConnected(data=net, name="fc2", num_hidden=64)
+net <- mx.symbol.Softmax(data=net, name="out")
 class(net)
 ```
 
@@ -322,36 +265,28 @@ We can also specify the automatic generated names explicitly:
 
 
 ```r
-net <- mx.symbol.Variable('data')
-w <- mx.symbol.Variable('myweight')
-net <- sym.FullyConnected(data=data, weight=w, name='fc1', num_hidden=128)
-```
-
-```
-## Error in eval(expr, envir, enclos): could not find function "sym.FullyConnected"
-```
-
-```r
+data <- mx.symbol.Variable("data")
+w <- mx.symbol.Variable("myweight")
+net <- mx.symbol.FullyConnected(data=data, weight=w, name="fc1", num_hidden=128)
 arguments(net)
 ```
 
 ```
-## [1] "data"
+## [1] "data"     "myweight" "fc1_bias"
 ```
 
 ### More Complicated Composition
 
-MXNet provides well-optimized symbols (see
-[src/operator](https://github.com/dmlc/mxnet/tree/master/src/operator)) for
+MXNet provides well-optimized symbols for
 commonly used layers in deep learning. We can also easily define new operators
 in python.  The following example first performs an elementwise add between two
 symbols, then feed them to the fully connected operator.
 
 
 ```r
-lhs <- mx.symbol.Variable('data1')
-rhs <- mx.symbol.Variable('data2')
-net <- mx.symbol.FullyConnected(data=lhs + rhs, name='fc1', num_hidden=128)
+lhs <- mx.symbol.Variable("data1")
+rhs <- mx.symbol.Variable("data2")
+net <- mx.symbol.FullyConnected(data=lhs + rhs, name="fc1", num_hidden=128)
 arguments(net)
 ```
 
@@ -364,66 +299,27 @@ forward composition we addressed before.
 
 
 ```r
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=128)
-net2 <- mx.symbol.Variable('data2')
-net2 <- mx.symbol.FullyConnected(data=net2, name='net2', num_hidden=128)
-composed_net <- net(data=net2, name='compose')
-```
-
-```
-## Error in eval(expr, envir, enclos): could not find function "net"
-```
-
-```r
-arguments(composed_net)
+net <- mx.symbol.Variable("data")
+net <- mx.symbol.FullyConnected(data=net, name="fc1", num_hidden=128)
+net2 <- mx.symbol.Variable("data2")
+net2 <- mx.symbol.FullyConnected(data=net2, name="net2", num_hidden=128)
+composed.net <- mx.apply(net, data=net2, name="compose")
+arguments(composed.net)
 ```
 
 ```
-## Error in inherits(x, "Rcpp_MXSymbol"): object 'composed_net' not found
+## [1] "data2"       "net2_weight" "net2_bias"   "fc1_weight"  "fc1_bias"
 ```
 
 In the above example, *net* is used a function to apply to an existing symbol
-*net*, the resulting *composed_net* will replace the original argument *data* by
+*net*, the resulting *composed.net* will replace the original argument *data* by
 *net2* instead.
 
-### Argument Shapes Inference
-
-Now we have known how to define the symbol. Next we can inference the shapes of
-all the arguments it needed by given the input data shape.
-
-
-```r
-net <- mx.symbol.Variable('data')
-net <- mx.symbol.FullyConnected(data=net, name='fc1', num_hidden=10)
-```
-
-The shape inference can be used as an earlier debugging mechanism to detect
-shape inconsistency.
-
-### Bind the Symbols and Run
-
-Now we can bind the free variables of the symbol and perform forward and backward.
-The bind function will create an ```Executor``` that can be used to carry out the real computations.
-
-For neural nets, a more commonly used pattern is ```simple_bind```, which will create
-all the arguments arrays for you. Then you can call forward, and backward(if gradient is needed)
-to get the gradient.
-
-
-```r
-A <- mx.symbol.Variable('A')
-B <- mx.symbol.Variable('B')
-C <- A * B
-
-texec <- mx.simple.bind(C)
-texec.forward()
-texec.backward()
-```
+### Training a Neural Net.
 
 The [model API](../../R-package/R/model.R) is a thin wrapper around the symbolic executors to support neural net training.
 
-You are also highly encouraged to read [Symbolic Configuration and Execution in Pictures](symbol_in_pictures.md),
+You are also highly encouraged to read [Symbolic Configuration and Execution in Pictures for python package](../python/symbol_in_pictures.md),
 which provides a detailed explanation of concepts in pictures.
 
 ### How Efficient is Symbolic API
@@ -441,14 +337,3 @@ extremely efficient.  We also provide fine grained operators for more flexible
 composition. Because we are also doing more inplace memory allocation, mxnet can
 be ***more memory efficient*** than cxxnet, and gets to same runtime, with
 greater flexiblity.
-
-
-
-
-
-
-
-
-
-
-

From 36a4ada48d12e6b4080c44bd169c147ed409084b Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Sun, 18 Oct 2015 15:17:18 -0700
Subject: [PATCH 7/8] Update index.md

---
 doc/R-package/index.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/R-package/index.md b/doc/R-package/index.md
index 055fd2284e06..966b9ed4b5b3 100644
--- a/doc/R-package/index.md
+++ b/doc/R-package/index.md
@@ -1,5 +1,6 @@
-MXNet R Package
-===============
+<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnetR.png width=155/> Deep Learning for R
+==========================
+
 You have find MXNet R Package! The MXNet R packages brings flexible and efficient GPU
 computing and state-of-art deep learning to R.
 

From fd1525dadd548b380abc2023958ccba8ba32a328 Mon Sep 17 00:00:00 2001
From: Tianqi Chen <tqchen@users.noreply.github.com>
Date: Sun, 18 Oct 2015 15:57:06 -0700
Subject: [PATCH 8/8] Update index.md

---
 doc/R-package/index.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/R-package/index.md b/doc/R-package/index.md
index 966b9ed4b5b3..629e6f997837 100644
--- a/doc/R-package/index.md
+++ b/doc/R-package/index.md
@@ -1,5 +1,5 @@
-<img src=https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/mxnetR.png width=155/> Deep Learning for R
-==========================
+MXNet R Packge: Deep Learning for R
+===================================
 
 You have find MXNet R Package! The MXNet R packages brings flexible and efficient GPU
 computing and state-of-art deep learning to R.