From 808ca94c85a718e9b30072f826b0fe475f233f0d Mon Sep 17 00:00:00 2001
From: Anirudh Acharya <aanirud@amazon.com>
Date: Tue, 21 Aug 2018 13:25:31 -0700
Subject: [PATCH 1/3] Fix CGAN R Tutorial with MNIST dataset

---
 example/gan/CGAN_mnist_R/CGAN_mnist_setup.R |  4 ++--
 example/gan/CGAN_mnist_R/CGAN_train.R       | 25 +++++++++++++--------
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R b/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
index ad57bc541230..eff7f90fd196 100644
--- a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
+++ b/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
@@ -55,7 +55,7 @@ batch_size<- 64
 data = mx.symbol.Variable('data')
 
 gen_rand<- mx.symbol.normal(loc=0, scale=1, shape=c(1, 1, random_dim, batch_size), name="gen_rand")
-gen_concat<- mx.symbol.Concat(data = list(data, gen_rand), num.args = 2, name="gen_concat")
+gen_concat<- mx.symbol.concat(data = list(data, gen_rand), num.args = 2, name="gen_concat")
 
 g1 = mx.symbol.Deconvolution(gen_concat, name='g1', kernel=c(4,4), num_filter=gen_features*4, no_bias=T)
 gbn1 = mx.symbol.BatchNorm(g1, name='gbn1', fix_gamma=fix_gamma, eps=eps)
@@ -83,7 +83,7 @@ label = mx.symbol.Variable('label')
 dis_digit<- mx.symbol.Reshape(data=dis_digit, shape=c(1,1,10,batch_size), name="digit_reshape")
 dis_digit<- mx.symbol.broadcast_to(data=dis_digit, shape=c(28,28,10, batch_size), name="digit_broadcast")
 
-data_concat <- mx.symbol.Concat(list(data, dis_digit), num.args = 2, dim = 1, name='dflat_concat')
+data_concat <- mx.symbol.concat(list(data, dis_digit), num.args = 2, dim = 1, name='dflat_concat')
 
 d1 = mx.symbol.Convolution(data=data_concat, name='d1', kernel=c(3,3), stride=c(1,1), pad=c(0,0), num_filter=24, no_bias=no_bias)
 dbn1 = mx.symbol.BatchNorm(d1, name='dbn1', fix_gamma=fix_gamma, eps=eps)
diff --git a/example/gan/CGAN_mnist_R/CGAN_train.R b/example/gan/CGAN_mnist_R/CGAN_train.R
index 9c7649f3e269..ad91b3df7421 100644
--- a/example/gan/CGAN_mnist_R/CGAN_train.R
+++ b/example/gan/CGAN_mnist_R/CGAN_train.R
@@ -19,6 +19,7 @@
 ### Training module for GAN
 #####################################################
 
+# Change this to mx.gpu() when running on gpu machine.
 devices<- mx.cpu()
 
 data_shape_G<- c(1, 1, 10, batch_size)
@@ -46,11 +47,11 @@ exec_D<- mx.simple.bind(symbol = D_sym, data=data_shape_D, digit=digit_shape_D,
 ### initialize parameters - To Do - personalise each layer
 initializer<- mx.init.Xavier(rnd_type = "gaussian", factor_type = "avg", magnitude = 3)
 
-arg_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$arg.shapes, ctx = mx.cpu())
-aux_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$aux.shapes, ctx = mx.cpu())
+arg_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$arg.shapes, ctx = devices)
+aux_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$aux.shapes, ctx = devices)
 
-arg_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$arg.shapes, ctx = mx.cpu())
-aux_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$aux.shapes, ctx = mx.cpu())
+arg_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$arg.shapes, ctx = devices)
+aux_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$aux.shapes, ctx = devices)
 
 mx.exec.update.arg.arrays(exec_G, arg_param_ini_G, match.name=TRUE)
 mx.exec.update.aux.arrays(exec_G, aux_param_ini_G, match.name=TRUE)
@@ -71,7 +72,7 @@ optimizer_G<-mx.opt.create(name = "adadelta",
                            rescale.grad=1/batch_size, 
                            clip_gradient=1)
 
-updater_G<- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays)
+updater_G<- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays, ctx = devices)
 
 optimizer_D<-mx.opt.create(name = "adadelta",
                            rho=0.92, 
@@ -79,7 +80,8 @@ optimizer_D<-mx.opt.create(name = "adadelta",
                            wd=0, 
                            rescale.grad=1/batch_size, 
                            clip_gradient=1)
-updater_D<- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays)
+
+updater_D<- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays, ctx = devices)
 
 ####################################
 #initialize metric
@@ -121,8 +123,10 @@ for (iteration in 1:2400) {
   update_args_D<- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
   
-  metric_D_value <- metric_D$update(label = mx.nd.array(rep(0, batch_size)), exec_D$ref.outputs[["D_sym_output"]], metric_D_value)
-  
+  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(0, batch_size))),
+                                    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]),
+                                    metric_D_value)
+
   ### Train loop on real
   mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_real, digit=D_digit_real, label=mx.nd.array(rep(1, batch_size))), match.name=TRUE)
   mx.exec.forward(exec_D, is.train=T)
@@ -130,7 +134,9 @@ for (iteration in 1:2400) {
   update_args_D<- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
   
-  metric_D_value <- metric_D$update(mx.nd.array(rep(1, batch_size)), exec_D$ref.outputs[["D_sym_output"]], metric_D_value)
+  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(1, batch_size))),
+                                    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]),
+                                    metric_D_value)
   
   ### Update Generator weights - use a seperate executor for writing data gradients
   exec_D_back<- mxnet:::mx.symbol.bind(symbol = D_sym, arg.arrays = exec_D$arg.arrays, aux.arrays = exec_D$aux.arrays, grad.reqs = rep("write", length(exec_D$arg.arrays)), ctx = devices)
@@ -167,6 +173,7 @@ for (iteration in 1:2400) {
   }
 }
 
+dir.create(file.path(".", "models"))
 mx.symbol.save(D_sym, filename = "models/D_sym_model_v1.json")
 mx.nd.save(exec_D$arg.arrays, filename = "models/D_aux_params_v1.params")
 mx.nd.save(exec_D$aux.arrays, filename = "models/D_aux_params_v1.params")

From 5dd0ff4a5cd59b4e59d13fc9aee11f4d719903df Mon Sep 17 00:00:00 2001
From: Anirudh Acharya <aanirud@amazon.com>
Date: Tue, 21 Aug 2018 14:47:04 -0700
Subject: [PATCH 2/3] fix nit issues

---
 example/gan/CGAN_mnist_R/CGAN_mnist_setup.R |  49 +++++----
 example/gan/CGAN_mnist_R/CGAN_train.R       | 106 ++++++++++++--------
 2 files changed, 93 insertions(+), 62 deletions(-)

diff --git a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R b/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
index eff7f90fd196..da3dd4d4e4f8 100644
--- a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
+++ b/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
@@ -28,7 +28,7 @@ source("iterators.R")
 ###   https://www.kaggle.com/c/digit-recognizer/data
 ######################################################
 train <- read_csv('data/train.csv')
-train<- data.matrix(train)
+train <- data.matrix(train)
 
 train_data <- train[,-1]
 train_data <- t(train_data/255*2-1)
@@ -39,14 +39,14 @@ dim(train_data) <- c(28, 28, 1, ncol(train_data))
 ##################################################
 #### Model parameters
 ##################################################
-random_dim<- 96
-gen_features<- 96
-dis_features<- 32
+random_dim <- 96
+gen_features <- 96
+dis_features <- 32
 image_depth = 1
-fix_gamma<- T
-no_bias<- T
-eps<- 1e-5 + 1e-12
-batch_size<- 64
+fix_gamma <- T
+no_bias <- T
+eps <- 1e-5 + 1e-12
+batch_size <- 64
 
 
 ##################################################
@@ -54,22 +54,25 @@ batch_size<- 64
 ##################################################
 data = mx.symbol.Variable('data')
 
-gen_rand<- mx.symbol.normal(loc=0, scale=1, shape=c(1, 1, random_dim, batch_size), name="gen_rand")
-gen_concat<- mx.symbol.concat(data = list(data, gen_rand), num.args = 2, name="gen_concat")
+gen_rand <- mx.symbol.normal(loc=0, scale=1, shape=c(1, 1, random_dim, batch_size), name="gen_rand")
+gen_concat <- mx.symbol.concat(data = list(data, gen_rand), num.args = 2, name="gen_concat")
 
 g1 = mx.symbol.Deconvolution(gen_concat, name='g1', kernel=c(4,4), num_filter=gen_features*4, no_bias=T)
 gbn1 = mx.symbol.BatchNorm(g1, name='gbn1', fix_gamma=fix_gamma, eps=eps)
 gact1 = mx.symbol.Activation(gbn1, name='gact1', act_type='relu')
 
-g2 = mx.symbol.Deconvolution(gact1, name='g2', kernel=c(3,3), stride=c(2,2), pad=c(1,1), num_filter=gen_features*2, no_bias=no_bias)
+g2 = mx.symbol.Deconvolution(gact1, name='g2', kernel=c(3,3), stride=c(2,2), pad=c(1,1),
+                             num_filter=gen_features*2, no_bias=no_bias)
 gbn2 = mx.symbol.BatchNorm(g2, name='gbn2', fix_gamma=fix_gamma, eps=eps)
 gact2 = mx.symbol.Activation(gbn2, name='gact2', act_type='relu')
 
-g3 = mx.symbol.Deconvolution(gact2, name='g3', kernel=c(4,4), stride=c(2,2), pad=c(1,1), num_filter=gen_features, no_bias=no_bias)
+g3 = mx.symbol.Deconvolution(gact2, name='g3', kernel=c(4,4), stride=c(2,2), pad=c(1,1),
+                             num_filter=gen_features, no_bias=no_bias)
 gbn3 = mx.symbol.BatchNorm(g3, name='gbn3', fix_gamma=fix_gamma, eps=eps)
 gact3 = mx.symbol.Activation(gbn3, name='gact3', act_type='relu')
 
-g4 = mx.symbol.Deconvolution(gact3, name='g4', kernel=c(4,4), stride=c(2,2), pad=c(1,1), num_filter=image_depth, no_bias=no_bias)
+g4 = mx.symbol.Deconvolution(gact3, name='g4', kernel=c(4,4), stride=c(2,2), pad=c(1,1),
+                             num_filter=image_depth, no_bias=no_bias)
 G_sym = mx.symbol.Activation(g4, name='G_sym', act_type='tanh')
 
 
@@ -80,25 +83,29 @@ data = mx.symbol.Variable('data')
 dis_digit = mx.symbol.Variable('digit')
 label = mx.symbol.Variable('label')
 
-dis_digit<- mx.symbol.Reshape(data=dis_digit, shape=c(1,1,10,batch_size), name="digit_reshape")
-dis_digit<- mx.symbol.broadcast_to(data=dis_digit, shape=c(28,28,10, batch_size), name="digit_broadcast")
+dis_digit <- mx.symbol.Reshape(data=dis_digit, shape=c(1,1,10,batch_size), name="digit_reshape")
+dis_digit <- mx.symbol.broadcast_to(data=dis_digit, shape=c(28,28,10, batch_size), name="digit_broadcast")
 
 data_concat <- mx.symbol.concat(list(data, dis_digit), num.args = 2, dim = 1, name='dflat_concat')
 
-d1 = mx.symbol.Convolution(data=data_concat, name='d1', kernel=c(3,3), stride=c(1,1), pad=c(0,0), num_filter=24, no_bias=no_bias)
+d1 = mx.symbol.Convolution(data=data_concat, name='d1', kernel=c(3,3), stride=c(1,1), pad=c(0,0),
+                           num_filter=24, no_bias=no_bias)
 dbn1 = mx.symbol.BatchNorm(d1, name='dbn1', fix_gamma=fix_gamma, eps=eps)
 dact1 = mx.symbol.LeakyReLU(dbn1, name='dact1', act_type='elu', slope=0.25)
 pool1 <- mx.symbol.Pooling(data=dact1, name="pool1", pool_type="max", kernel=c(2,2), stride=c(2,2), pad=c(0,0))
 
-d2 = mx.symbol.Convolution(pool1, name='d2', kernel=c(3,3), stride=c(2,2), pad=c(0,0), num_filter=32, no_bias=no_bias)
+d2 = mx.symbol.Convolution(pool1, name='d2', kernel=c(3,3), stride=c(2,2), pad=c(0,0), num_filter=32,
+                           no_bias=no_bias)
 dbn2 = mx.symbol.BatchNorm(d2, name='dbn2', fix_gamma=fix_gamma, eps=eps)
 dact2 = mx.symbol.LeakyReLU(dbn2, name='dact2', act_type='elu', slope=0.25)
 
-d3 = mx.symbol.Convolution(dact2, name='d3', kernel=c(3,3), stride=c(1,1), pad=c(0,0), num_filter=64, no_bias=no_bias)
+d3 = mx.symbol.Convolution(dact2, name='d3', kernel=c(3,3), stride=c(1,1), pad=c(0,0), num_filter=64,
+                           no_bias=no_bias)
 dbn3 = mx.symbol.BatchNorm(d3, name='dbn3', fix_gamma=fix_gamma, eps=eps)
 dact3 = mx.symbol.LeakyReLU(dbn3, name='dact3', act_type='elu', slope=0.25)
 
-d4 = mx.symbol.Convolution(dact2, name='d3', kernel=c(4,4), stride=c(1,1), pad=c(0,0), num_filter=64, no_bias=no_bias)
+d4 = mx.symbol.Convolution(dact2, name='d3', kernel=c(4,4), stride=c(1,1), pad=c(0,0), num_filter=64,
+                           no_bias=no_bias)
 dbn4 = mx.symbol.BatchNorm(d4, name='dbn4', fix_gamma=fix_gamma, eps=eps)
 dact4 = mx.symbol.LeakyReLU(dbn4, name='dact4', act_type='elu', slope=0.25)
 
@@ -113,8 +120,8 @@ D_sym = mx.symbol.LogisticRegressionOutput(data=dfc, label=label, name='D_sym')
 ########################
 ### Graph
 ########################
-input_shape_G<- c(1, 1, 10, batch_size)
-input_shape_D<- c(28, 28, 1, batch_size)
+input_shape_G <- c(1, 1, 10, batch_size)
+input_shape_D <- c(28, 28, 1, batch_size)
 
 graph.viz(G_sym, type = "graph", direction = "LR")
 graph.viz(D_sym, type = "graph", direction = "LR")
diff --git a/example/gan/CGAN_mnist_R/CGAN_train.R b/example/gan/CGAN_mnist_R/CGAN_train.R
index ad91b3df7421..5cee52672e82 100644
--- a/example/gan/CGAN_mnist_R/CGAN_train.R
+++ b/example/gan/CGAN_mnist_R/CGAN_train.R
@@ -20,11 +20,11 @@
 #####################################################
 
 # Change this to mx.gpu() when running on gpu machine.
-devices<- mx.cpu()
+devices <- mx.cpu()
 
-data_shape_G<- c(1, 1, 10, batch_size)
-data_shape_D<- c(28, 28, 1, batch_size)
-digit_shape_D<- c(10, batch_size)
+data_shape_G <- c(1, 1, 10, batch_size)
+data_shape_D <- c(28, 28, 1, batch_size)
+digit_shape_D <- c(10, batch_size)
 
 mx.metric.binacc <- mx.metric.custom("binacc", function(label, pred) {
   res <- mean(label==round(pred))
@@ -38,20 +38,35 @@ mx.metric.logloss <- mx.metric.custom("logloss", function(label, pred) {
 
 ##############################################
 ### Define iterators
-iter_G<- G_iterator(batch_size = batch_size)
-iter_D<- D_iterator(batch_size = batch_size)
+iter_G <- G_iterator(batch_size = batch_size)
+iter_D <- D_iterator(batch_size = batch_size)
 
-exec_G<- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices, grad.req = "write")
-exec_D<- mx.simple.bind(symbol = D_sym, data=data_shape_D, digit=digit_shape_D, ctx = devices, grad.req = "write")
+exec_G <- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices,
+                        grad.req = "write")
+exec_D <- mx.simple.bind(symbol = D_sym, data=data_shape_D, digit=digit_shape_D,
+                        ctx = devices, grad.req = "write")
 
 ### initialize parameters - To Do - personalise each layer
-initializer<- mx.init.Xavier(rnd_type = "gaussian", factor_type = "avg", magnitude = 3)
-
-arg_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$arg.shapes, ctx = devices)
-aux_param_ini_G<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, data=data_shape_G)$aux.shapes, ctx = devices)
-
-arg_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$arg.shapes, ctx = devices)
-aux_param_ini_D<- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D, digit=digit_shape_D)$aux.shapes, ctx = devices)
+initializer <- mx.init.Xavier(rnd_type = "gaussian", factor_type = "avg", magnitude = 3)
+
+arg_param_ini_G <- mx.init.create(initializer = initializer,
+                                  shape.array = mx.symbol.infer.shape(G_sym,
+                                                                      data=data_shape_G)$arg.shapes,
+                                  ctx = devices)
+aux_param_ini_G <- mx.init.create(initializer = initializer,
+                                  shape.array = mx.symbol.infer.shape(G_sym,
+                                                                      data=data_shape_G)$aux.shapes,
+                                  ctx = devices)
+
+arg_param_ini_D <- mx.init.create(initializer = initializer,
+                                  shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D,
+                                                                      digit=digit_shape_D)$arg.shapes,
+                                  ctx = devices)
+
+aux_param_ini_D <- mx.init.create(initializer = initializer,
+                                  shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D,
+                                                                      digit=digit_shape_D)$aux.shapes,
+                                  ctx = devices)
 
 mx.exec.update.arg.arrays(exec_G, arg_param_ini_G, match.name=TRUE)
 mx.exec.update.aux.arrays(exec_G, aux_param_ini_G, match.name=TRUE)
@@ -65,33 +80,33 @@ input_names_D <- mxnet:::mx.model.check.arguments(D_sym)
 
 ###################################################
 #initialize optimizers
-optimizer_G<-mx.opt.create(name = "adadelta",
+optimizer_G <- mx.opt.create(name = "adadelta",
                            rho=0.92, 
                            epsilon = 1e-6, 
                            wd=0, 
                            rescale.grad=1/batch_size, 
                            clip_gradient=1)
 
-updater_G<- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays, ctx = devices)
+updater_G <- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays, ctx = devices)
 
-optimizer_D<-mx.opt.create(name = "adadelta",
+optimizer_D <- mx.opt.create(name = "adadelta",
                            rho=0.92, 
                            epsilon = 1e-6, 
                            wd=0, 
                            rescale.grad=1/batch_size, 
                            clip_gradient=1)
 
-updater_D<- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays, ctx = devices)
+updater_D <- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays, ctx = devices)
 
 ####################################
 #initialize metric
-metric_G<- mx.metric.binacc
-metric_G_value<- metric_G$init()
+metric_G <- mx.metric.binacc
+metric_G_value <- metric_G$init()
 
-metric_D<- mx.metric.binacc
-metric_D_value<- metric_D$init()
+metric_D <- mx.metric.binacc
+metric_D_value <- metric_D$init()
 
-iteration<- 1
+iteration <- 1
 iter_G$reset()
 iter_D$reset()
 
@@ -117,10 +132,12 @@ for (iteration in 1:2400) {
   D_digit_real <- D_values$digit
   
   ### Train loop on fake
-  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_fake, digit=D_digit_fake, label=mx.nd.array(rep(0, batch_size))), match.name=TRUE)
+  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_fake, digit=D_digit_fake,
+                                                      label=mx.nd.array(rep(0, batch_size))),
+                            match.name=TRUE)
   mx.exec.forward(exec_D, is.train=T)
   mx.exec.backward(exec_D)
-  update_args_D<- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
+  update_args_D <- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
   
   metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(0, batch_size))),
@@ -128,10 +145,12 @@ for (iteration in 1:2400) {
                                     metric_D_value)
 
   ### Train loop on real
-  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_real, digit=D_digit_real, label=mx.nd.array(rep(1, batch_size))), match.name=TRUE)
+  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_real, digit=D_digit_real,
+                                                      label=mx.nd.array(rep(1, batch_size))),
+                            match.name=TRUE)
   mx.exec.forward(exec_D, is.train=T)
   mx.exec.backward(exec_D)
-  update_args_D<- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
+  update_args_D <- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
   
   metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(1, batch_size))),
@@ -139,14 +158,19 @@ for (iteration in 1:2400) {
                                     metric_D_value)
   
   ### Update Generator weights - use a seperate executor for writing data gradients
-  exec_D_back<- mxnet:::mx.symbol.bind(symbol = D_sym, arg.arrays = exec_D$arg.arrays, aux.arrays = exec_D$aux.arrays, grad.reqs = rep("write", length(exec_D$arg.arrays)), ctx = devices)
-  mx.exec.update.arg.arrays(exec_D_back, arg.arrays = list(data=D_data_fake, digit=D_digit_fake, label=mx.nd.array(rep(1, batch_size))), match.name=TRUE)
+  exec_D_back <- mxnet:::mx.symbol.bind(symbol = D_sym, arg.arrays = exec_D$arg.arrays,
+                                       aux.arrays = exec_D$aux.arrays,
+                                       grad.reqs = rep("write", length(exec_D$arg.arrays)),
+                                       ctx = devices)
+  mx.exec.update.arg.arrays(exec_D_back, arg.arrays = list(data=D_data_fake, digit=D_digit_fake,
+                                                           label=mx.nd.array(rep(1, batch_size))),
+                            match.name=TRUE)
   mx.exec.forward(exec_D_back, is.train=T)
   mx.exec.backward(exec_D_back)
-  D_grads<- exec_D_back$ref.grad.arrays$data
+  D_grads <- exec_D_back$ref.grad.arrays$data
   mx.exec.backward(exec_G, out_grads=D_grads)
   
-  update_args_G<- updater_G(weight = exec_G$ref.arg.arrays, grad = exec_G$ref.grad.arrays)
+  update_args_G <- updater_G(weight = exec_G$ref.arg.arrays, grad = exec_G$ref.grad.arrays)
   mx.exec.update.arg.arrays(exec_G, update_args_G, skip.null=TRUE)
   
   ### Update metrics
@@ -159,7 +183,7 @@ for (iteration in 1:2400) {
   
   if (iteration==1 | iteration %% 100==0){
     
-    metric_D_value<- metric_D$init()
+    metric_D_value <- metric_D$init()
     
     par(mfrow=c(3,3), mar=c(0.1,0.1,0.1,0.1))
     for (i in 1:9) {
@@ -173,7 +197,7 @@ for (iteration in 1:2400) {
   }
 }
 
-dir.create(file.path(".", "models"))
+ifelse(!dir.exists(file.path(".", "models")), dir.create(file.path(".", "models")), "Folder already exists")
 mx.symbol.save(D_sym, filename = "models/D_sym_model_v1.json")
 mx.nd.save(exec_D$arg.arrays, filename = "models/D_aux_params_v1.params")
 mx.nd.save(exec_D$aux.arrays, filename = "models/D_aux_params_v1.params")
@@ -184,15 +208,15 @@ mx.nd.save(exec_G$aux.arrays, filename = "models/G_aux_params_v1.params")
 
 
 ### Inference
-G_sym<- mx.symbol.load("models/G_sym_model_v1.json")
-G_arg_params<- mx.nd.load("models/G_arg_params_v1.params")
-G_aux_params<- mx.nd.load("models/G_aux_params_v1.params")
+G_sym <- mx.symbol.load("models/G_sym_model_v1.json")
+G_arg_params <- mx.nd.load("models/G_arg_params_v1.params")
+G_aux_params <- mx.nd.load("models/G_aux_params_v1.params")
 
-digit<- mx.nd.array(rep(9, times=batch_size))
-data<- mx.nd.one.hot(indices = digit, depth = 10)
-data<- mx.nd.reshape(data = data, shape = c(1,1,-1, batch_size))
+digit <- mx.nd.array(rep(9, times=batch_size))
+data <- mx.nd.one.hot(indices = digit, depth = 10)
+data <- mx.nd.reshape(data = data, shape = c(1,1,-1, batch_size))
 
-exec_G<- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices, grad.req = "null")
+exec_G <- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices, grad.req = "null")
 mx.exec.update.arg.arrays(exec_G, G_arg_params, match.name=TRUE)
 mx.exec.update.arg.arrays(exec_G, list(data=data), match.name=TRUE)
 mx.exec.update.aux.arrays(exec_G, G_aux_params, match.name=TRUE)

From 018957d90be420c4f769ff4c973aad482f67cc7c Mon Sep 17 00:00:00 2001
From: Anirudh Acharya <aanirud@amazon.com>
Date: Thu, 23 Aug 2018 16:38:25 -0700
Subject: [PATCH 3/3] format using formatR

---
 example/gan/CGAN_mnist_R/CGAN_mnist_setup.R | 128 ---------
 example/gan/CGAN_mnist_R/CGAN_train.R       | 283 +++++++++++++-------
 example/gan/CGAN_mnist_R/iterators.R        |  64 ++---
 3 files changed, 217 insertions(+), 258 deletions(-)
 delete mode 100644 example/gan/CGAN_mnist_R/CGAN_mnist_setup.R

diff --git a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R b/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
deleted file mode 100644
index da3dd4d4e4f8..000000000000
--- a/example/gan/CGAN_mnist_R/CGAN_mnist_setup.R
+++ /dev/null
@@ -1,128 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-require("imager")
-require("dplyr")
-require("readr")
-require("mxnet")
-
-source("iterators.R")
-
-######################################################
-### Data import and preperation
-### First download MNIST train data at Kaggle: 
-###   https://www.kaggle.com/c/digit-recognizer/data
-######################################################
-train <- read_csv('data/train.csv')
-train <- data.matrix(train)
-
-train_data <- train[,-1]
-train_data <- t(train_data/255*2-1)
-train_label <- as.integer(train[,1])
-
-dim(train_data) <- c(28, 28, 1, ncol(train_data))
-
-##################################################
-#### Model parameters
-##################################################
-random_dim <- 96
-gen_features <- 96
-dis_features <- 32
-image_depth = 1
-fix_gamma <- T
-no_bias <- T
-eps <- 1e-5 + 1e-12
-batch_size <- 64
-
-
-##################################################
-#### Generator Symbol
-##################################################
-data = mx.symbol.Variable('data')
-
-gen_rand <- mx.symbol.normal(loc=0, scale=1, shape=c(1, 1, random_dim, batch_size), name="gen_rand")
-gen_concat <- mx.symbol.concat(data = list(data, gen_rand), num.args = 2, name="gen_concat")
-
-g1 = mx.symbol.Deconvolution(gen_concat, name='g1', kernel=c(4,4), num_filter=gen_features*4, no_bias=T)
-gbn1 = mx.symbol.BatchNorm(g1, name='gbn1', fix_gamma=fix_gamma, eps=eps)
-gact1 = mx.symbol.Activation(gbn1, name='gact1', act_type='relu')
-
-g2 = mx.symbol.Deconvolution(gact1, name='g2', kernel=c(3,3), stride=c(2,2), pad=c(1,1),
-                             num_filter=gen_features*2, no_bias=no_bias)
-gbn2 = mx.symbol.BatchNorm(g2, name='gbn2', fix_gamma=fix_gamma, eps=eps)
-gact2 = mx.symbol.Activation(gbn2, name='gact2', act_type='relu')
-
-g3 = mx.symbol.Deconvolution(gact2, name='g3', kernel=c(4,4), stride=c(2,2), pad=c(1,1),
-                             num_filter=gen_features, no_bias=no_bias)
-gbn3 = mx.symbol.BatchNorm(g3, name='gbn3', fix_gamma=fix_gamma, eps=eps)
-gact3 = mx.symbol.Activation(gbn3, name='gact3', act_type='relu')
-
-g4 = mx.symbol.Deconvolution(gact3, name='g4', kernel=c(4,4), stride=c(2,2), pad=c(1,1),
-                             num_filter=image_depth, no_bias=no_bias)
-G_sym = mx.symbol.Activation(g4, name='G_sym', act_type='tanh')
-
-
-##################################################
-#### Discriminator Symbol
-##################################################
-data = mx.symbol.Variable('data')
-dis_digit = mx.symbol.Variable('digit')
-label = mx.symbol.Variable('label')
-
-dis_digit <- mx.symbol.Reshape(data=dis_digit, shape=c(1,1,10,batch_size), name="digit_reshape")
-dis_digit <- mx.symbol.broadcast_to(data=dis_digit, shape=c(28,28,10, batch_size), name="digit_broadcast")
-
-data_concat <- mx.symbol.concat(list(data, dis_digit), num.args = 2, dim = 1, name='dflat_concat')
-
-d1 = mx.symbol.Convolution(data=data_concat, name='d1', kernel=c(3,3), stride=c(1,1), pad=c(0,0),
-                           num_filter=24, no_bias=no_bias)
-dbn1 = mx.symbol.BatchNorm(d1, name='dbn1', fix_gamma=fix_gamma, eps=eps)
-dact1 = mx.symbol.LeakyReLU(dbn1, name='dact1', act_type='elu', slope=0.25)
-pool1 <- mx.symbol.Pooling(data=dact1, name="pool1", pool_type="max", kernel=c(2,2), stride=c(2,2), pad=c(0,0))
-
-d2 = mx.symbol.Convolution(pool1, name='d2', kernel=c(3,3), stride=c(2,2), pad=c(0,0), num_filter=32,
-                           no_bias=no_bias)
-dbn2 = mx.symbol.BatchNorm(d2, name='dbn2', fix_gamma=fix_gamma, eps=eps)
-dact2 = mx.symbol.LeakyReLU(dbn2, name='dact2', act_type='elu', slope=0.25)
-
-d3 = mx.symbol.Convolution(dact2, name='d3', kernel=c(3,3), stride=c(1,1), pad=c(0,0), num_filter=64,
-                           no_bias=no_bias)
-dbn3 = mx.symbol.BatchNorm(d3, name='dbn3', fix_gamma=fix_gamma, eps=eps)
-dact3 = mx.symbol.LeakyReLU(dbn3, name='dact3', act_type='elu', slope=0.25)
-
-d4 = mx.symbol.Convolution(dact2, name='d3', kernel=c(4,4), stride=c(1,1), pad=c(0,0), num_filter=64,
-                           no_bias=no_bias)
-dbn4 = mx.symbol.BatchNorm(d4, name='dbn4', fix_gamma=fix_gamma, eps=eps)
-dact4 = mx.symbol.LeakyReLU(dbn4, name='dact4', act_type='elu', slope=0.25)
-
-# pool4 <- mx.symbol.Pooling(data=dact3, name="pool4", pool_type="avg", kernel=c(4,4), stride=c(1,1), pad=c(0,0))
-
-dflat = mx.symbol.Flatten(dact4, name="dflat")
-
-dfc <- mx.symbol.FullyConnected(data=dflat, name="dfc", num_hidden=1, no_bias=F)
-D_sym = mx.symbol.LogisticRegressionOutput(data=dfc, label=label, name='D_sym')
-
-
-########################
-### Graph
-########################
-input_shape_G <- c(1, 1, 10, batch_size)
-input_shape_D <- c(28, 28, 1, batch_size)
-
-graph.viz(G_sym, type = "graph", direction = "LR")
-graph.viz(D_sym, type = "graph", direction = "LR")
-
diff --git a/example/gan/CGAN_mnist_R/CGAN_train.R b/example/gan/CGAN_mnist_R/CGAN_train.R
index 5cee52672e82..7d3225483c02 100644
--- a/example/gan/CGAN_mnist_R/CGAN_train.R
+++ b/example/gan/CGAN_mnist_R/CGAN_train.R
@@ -15,9 +15,116 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#####################################################
+require("imager")
+require("dplyr")
+require("readr")
+require("mxnet")
+
+source("iterators.R")
+
+### Data import and preperation 
+# First download MNIST train data at Kaggle:
+# https://www.kaggle.com/c/digit-recognizer/data
+
+train <- read_csv("data/train.csv")
+train <- data.matrix(train)
+
+train_data <- train[, -1]
+train_data <- t(train_data/255 * 2 - 1)
+train_label <- as.integer(train[, 1])
+
+dim(train_data) <- c(28, 28, 1, ncol(train_data))
+
+### Model parameters
+random_dim <- 96
+gen_features <- 96
+dis_features <- 32
+image_depth <- 1
+fix_gamma <- T
+no_bias <- T
+eps <- 1e-05 + 1e-12
+batch_size <- 64
+
+
+### Generator Symbol
+data <- mx.symbol.Variable("data")
+
+gen_rand <- mx.symbol.normal(loc = 0, scale = 1, shape = c(1, 1, random_dim, batch_size), 
+  name = "gen_rand")
+gen_concat <- mx.symbol.concat(data = list(data, gen_rand), num.args = 2, name = "gen_concat")
+
+g1 <- mx.symbol.Deconvolution(gen_concat, name = "g1", kernel = c(4, 4), num_filter = gen_features * 
+  4, no_bias = T)
+gbn1 <- mx.symbol.BatchNorm(g1, name = "gbn1", fix_gamma = fix_gamma, eps = eps)
+gact1 <- mx.symbol.Activation(gbn1, name = "gact1", act_type = "relu")
+
+g2 <- mx.symbol.Deconvolution(gact1, name = "g2", kernel = c(3, 3), stride = c(2, 
+  2), pad = c(1, 1), num_filter = gen_features * 2, no_bias = no_bias)
+gbn2 <- mx.symbol.BatchNorm(g2, name = "gbn2", fix_gamma = fix_gamma, eps = eps)
+gact2 <- mx.symbol.Activation(gbn2, name = "gact2", act_type = "relu")
+
+g3 <- mx.symbol.Deconvolution(gact2, name = "g3", kernel = c(4, 4), stride = c(2, 
+  2), pad = c(1, 1), num_filter = gen_features, no_bias = no_bias)
+gbn3 <- mx.symbol.BatchNorm(g3, name = "gbn3", fix_gamma = fix_gamma, eps = eps)
+gact3 <- mx.symbol.Activation(gbn3, name = "gact3", act_type = "relu")
+
+g4 <- mx.symbol.Deconvolution(gact3, name = "g4", kernel = c(4, 4), stride = c(2, 
+  2), pad = c(1, 1), num_filter = image_depth, no_bias = no_bias)
+G_sym <- mx.symbol.Activation(g4, name = "G_sym", act_type = "tanh")
+
+
+### Discriminator Symbol
+data <- mx.symbol.Variable("data")
+dis_digit <- mx.symbol.Variable("digit")
+label <- mx.symbol.Variable("label")
+
+dis_digit <- mx.symbol.Reshape(data = dis_digit, shape = c(1, 1, 10, batch_size), 
+  name = "digit_reshape")
+dis_digit <- mx.symbol.broadcast_to(data = dis_digit, shape = c(28, 28, 10, batch_size), 
+  name = "digit_broadcast")
+
+data_concat <- mx.symbol.concat(list(data, dis_digit), num.args = 2, dim = 1, name = "dflat_concat")
+
+d1 <- mx.symbol.Convolution(data = data_concat, name = "d1", kernel = c(3, 3), stride = c(1, 
+  1), pad = c(0, 0), num_filter = 24, no_bias = no_bias)
+dbn1 <- mx.symbol.BatchNorm(d1, name = "dbn1", fix_gamma = fix_gamma, eps = eps)
+dact1 <- mx.symbol.LeakyReLU(dbn1, name = "dact1", act_type = "elu", slope = 0.25)
+pool1 <- mx.symbol.Pooling(data = dact1, name = "pool1", pool_type = "max", kernel = c(2, 
+  2), stride = c(2, 2), pad = c(0, 0))
+
+d2 <- mx.symbol.Convolution(pool1, name = "d2", kernel = c(3, 3), stride = c(2, 2), 
+  pad = c(0, 0), num_filter = 32, no_bias = no_bias)
+dbn2 <- mx.symbol.BatchNorm(d2, name = "dbn2", fix_gamma = fix_gamma, eps = eps)
+dact2 <- mx.symbol.LeakyReLU(dbn2, name = "dact2", act_type = "elu", slope = 0.25)
+
+d3 <- mx.symbol.Convolution(dact2, name = "d3", kernel = c(3, 3), stride = c(1, 1), 
+  pad = c(0, 0), num_filter = 64, no_bias = no_bias)
+dbn3 <- mx.symbol.BatchNorm(d3, name = "dbn3", fix_gamma = fix_gamma, eps = eps)
+dact3 <- mx.symbol.LeakyReLU(dbn3, name = "dact3", act_type = "elu", slope = 0.25)
+
+d4 <- mx.symbol.Convolution(dact2, name = "d3", kernel = c(4, 4), stride = c(1, 1), 
+  pad = c(0, 0), num_filter = 64, no_bias = no_bias)
+dbn4 <- mx.symbol.BatchNorm(d4, name = "dbn4", fix_gamma = fix_gamma, eps = eps)
+dact4 <- mx.symbol.LeakyReLU(dbn4, name = "dact4", act_type = "elu", slope = 0.25)
+
+# pool4 <- mx.symbol.Pooling(data=dact3, name='pool4', pool_type='avg',
+# kernel=c(4,4), stride=c(1,1), pad=c(0,0))
+
+dflat <- mx.symbol.Flatten(dact4, name = "dflat")
+
+dfc <- mx.symbol.FullyConnected(data = dflat, name = "dfc", num_hidden = 1, no_bias = F)
+D_sym <- mx.symbol.LogisticRegressionOutput(data = dfc, label = label, name = "D_sym")
+
+
+### Graph
+input_shape_G <- c(1, 1, 10, batch_size)
+input_shape_D <- c(28, 28, 1, batch_size)
+
+graph.viz(G_sym, type = "graph", direction = "LR")
+graph.viz(D_sym, type = "graph", direction = "LR")
+
+
 ### Training module for GAN
-#####################################################
 
 # Change this to mx.gpu() when running on gpu machine.
 devices <- mx.cpu()
@@ -27,79 +134,61 @@ data_shape_D <- c(28, 28, 1, batch_size)
 digit_shape_D <- c(10, batch_size)
 
 mx.metric.binacc <- mx.metric.custom("binacc", function(label, pred) {
-  res <- mean(label==round(pred))
+  res <- mean(label == round(pred))
   return(res)
 })
 
 mx.metric.logloss <- mx.metric.custom("logloss", function(label, pred) {
-  res <- mean(label*log(pred)+(1-label)*log(1-pred))
+  res <- mean(label * log(pred) + (1 - label) * log(1 - pred))
   return(res)
 })
 
-##############################################
 ### Define iterators
 iter_G <- G_iterator(batch_size = batch_size)
 iter_D <- D_iterator(batch_size = batch_size)
 
-exec_G <- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices,
-                        grad.req = "write")
-exec_D <- mx.simple.bind(symbol = D_sym, data=data_shape_D, digit=digit_shape_D,
-                        ctx = devices, grad.req = "write")
+exec_G <- mx.simple.bind(symbol = G_sym, data = data_shape_G, ctx = devices, grad.req = "write")
+exec_D <- mx.simple.bind(symbol = D_sym, data = data_shape_D, digit = digit_shape_D, 
+  ctx = devices, grad.req = "write")
 
 ### initialize parameters - To Do - personalise each layer
 initializer <- mx.init.Xavier(rnd_type = "gaussian", factor_type = "avg", magnitude = 3)
 
-arg_param_ini_G <- mx.init.create(initializer = initializer,
-                                  shape.array = mx.symbol.infer.shape(G_sym,
-                                                                      data=data_shape_G)$arg.shapes,
-                                  ctx = devices)
-aux_param_ini_G <- mx.init.create(initializer = initializer,
-                                  shape.array = mx.symbol.infer.shape(G_sym,
-                                                                      data=data_shape_G)$aux.shapes,
-                                  ctx = devices)
+arg_param_ini_G <- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, 
+  data = data_shape_G)$arg.shapes, ctx = devices)
+aux_param_ini_G <- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(G_sym, 
+  data = data_shape_G)$aux.shapes, ctx = devices)
 
-arg_param_ini_D <- mx.init.create(initializer = initializer,
-                                  shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D,
-                                                                      digit=digit_shape_D)$arg.shapes,
-                                  ctx = devices)
+arg_param_ini_D <- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, 
+  data = data_shape_D, digit = digit_shape_D)$arg.shapes, ctx = devices)
 
-aux_param_ini_D <- mx.init.create(initializer = initializer,
-                                  shape.array = mx.symbol.infer.shape(D_sym, data=data_shape_D,
-                                                                      digit=digit_shape_D)$aux.shapes,
-                                  ctx = devices)
+aux_param_ini_D <- mx.init.create(initializer = initializer, shape.array = mx.symbol.infer.shape(D_sym, 
+  data = data_shape_D, digit = digit_shape_D)$aux.shapes, ctx = devices)
 
-mx.exec.update.arg.arrays(exec_G, arg_param_ini_G, match.name=TRUE)
-mx.exec.update.aux.arrays(exec_G, aux_param_ini_G, match.name=TRUE)
+mx.exec.update.arg.arrays(exec_G, arg_param_ini_G, match.name = TRUE)
+mx.exec.update.aux.arrays(exec_G, aux_param_ini_G, match.name = TRUE)
 
-mx.exec.update.arg.arrays(exec_D, arg_param_ini_D, match.name=TRUE)
-mx.exec.update.aux.arrays(exec_D, aux_param_ini_D, match.name=TRUE)
+mx.exec.update.arg.arrays(exec_D, arg_param_ini_D, match.name = TRUE)
+mx.exec.update.aux.arrays(exec_D, aux_param_ini_D, match.name = TRUE)
 
 input_names_G <- mxnet:::mx.model.check.arguments(G_sym)
 input_names_D <- mxnet:::mx.model.check.arguments(D_sym)
 
 
-###################################################
-#initialize optimizers
-optimizer_G <- mx.opt.create(name = "adadelta",
-                           rho=0.92, 
-                           epsilon = 1e-6, 
-                           wd=0, 
-                           rescale.grad=1/batch_size, 
-                           clip_gradient=1)
+### initialize optimizers
+optimizer_G <- mx.opt.create(name = "adadelta", rho = 0.92, epsilon = 1e-06, wd = 0, 
+  rescale.grad = 1/batch_size, clip_gradient = 1)
 
-updater_G <- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays, ctx = devices)
+updater_G <- mx.opt.get.updater(optimizer = optimizer_G, weights = exec_G$ref.arg.arrays, 
+  ctx = devices)
 
-optimizer_D <- mx.opt.create(name = "adadelta",
-                           rho=0.92, 
-                           epsilon = 1e-6, 
-                           wd=0, 
-                           rescale.grad=1/batch_size, 
-                           clip_gradient=1)
+optimizer_D <- mx.opt.create(name = "adadelta", rho = 0.92, epsilon = 1e-06, wd = 0, 
+  rescale.grad = 1/batch_size, clip_gradient = 1)
 
-updater_D <- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays, ctx = devices)
+updater_D <- mx.opt.get.updater(optimizer = optimizer_D, weights = exec_D$ref.arg.arrays, 
+  ctx = devices)
 
-####################################
-#initialize metric
+### initialize metric
 metric_G <- mx.metric.binacc
 metric_G_value <- metric_G$init()
 
@@ -119,85 +208,81 @@ for (iteration in 1:2400) {
   ### Random input to Generator to produce fake sample
   G_values <- iter_G$value()
   G_data <- G_values[input_names_G]
-  mx.exec.update.arg.arrays(exec_G, arg.arrays = G_data, match.name=TRUE)
-  mx.exec.forward(exec_G, is.train=T)
+  mx.exec.update.arg.arrays(exec_G, arg.arrays = G_data, match.name = TRUE)
+  mx.exec.forward(exec_G, is.train = T)
   
-  ### Feed Discriminator with Concatenated Generator images and real images
-  ### Random input to Generator
+  ### Feed Discriminator with Concatenated Generator images and real images Random
+  ### input to Generator
   D_data_fake <- exec_G$ref.outputs$G_sym_output
-  D_digit_fake <- G_values$data %>% mx.nd.Reshape(shape=c(-1, batch_size))
+  D_digit_fake <- G_values$data %>% mx.nd.Reshape(shape = c(-1, batch_size))
   
   D_values <- iter_D$value()
   D_data_real <- D_values$data
   D_digit_real <- D_values$digit
   
   ### Train loop on fake
-  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_fake, digit=D_digit_fake,
-                                                      label=mx.nd.array(rep(0, batch_size))),
-                            match.name=TRUE)
-  mx.exec.forward(exec_D, is.train=T)
+  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data = D_data_fake, digit = D_digit_fake, 
+    label = mx.nd.array(rep(0, batch_size))), match.name = TRUE)
+  mx.exec.forward(exec_D, is.train = T)
   mx.exec.backward(exec_D)
   update_args_D <- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
-  mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
+  mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null = TRUE)
+  
+  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(0, batch_size))), 
+    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]), metric_D_value)
   
-  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(0, batch_size))),
-                                    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]),
-                                    metric_D_value)
-
   ### Train loop on real
-  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data=D_data_real, digit=D_digit_real,
-                                                      label=mx.nd.array(rep(1, batch_size))),
-                            match.name=TRUE)
-  mx.exec.forward(exec_D, is.train=T)
+  mx.exec.update.arg.arrays(exec_D, arg.arrays = list(data = D_data_real, digit = D_digit_real, 
+    label = mx.nd.array(rep(1, batch_size))), match.name = TRUE)
+  mx.exec.forward(exec_D, is.train = T)
   mx.exec.backward(exec_D)
   update_args_D <- updater_D(weight = exec_D$ref.arg.arrays, grad = exec_D$ref.grad.arrays)
-  mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null=TRUE)
+  mx.exec.update.arg.arrays(exec_D, update_args_D, skip.null = TRUE)
   
-  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(1, batch_size))),
-                                    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]),
-                                    metric_D_value)
+  metric_D_value <- metric_D$update(label = as.array(mx.nd.array(rep(1, batch_size))), 
+    pred = as.array(exec_D$ref.outputs[["D_sym_output"]]), metric_D_value)
   
   ### Update Generator weights - use a seperate executor for writing data gradients
-  exec_D_back <- mxnet:::mx.symbol.bind(symbol = D_sym, arg.arrays = exec_D$arg.arrays,
-                                       aux.arrays = exec_D$aux.arrays,
-                                       grad.reqs = rep("write", length(exec_D$arg.arrays)),
-                                       ctx = devices)
-  mx.exec.update.arg.arrays(exec_D_back, arg.arrays = list(data=D_data_fake, digit=D_digit_fake,
-                                                           label=mx.nd.array(rep(1, batch_size))),
-                            match.name=TRUE)
-  mx.exec.forward(exec_D_back, is.train=T)
+  exec_D_back <- mxnet:::mx.symbol.bind(symbol = D_sym, arg.arrays = exec_D$arg.arrays, 
+    aux.arrays = exec_D$aux.arrays, grad.reqs = rep("write", length(exec_D$arg.arrays)), 
+    ctx = devices)
+  mx.exec.update.arg.arrays(exec_D_back, arg.arrays = list(data = D_data_fake, 
+    digit = D_digit_fake, label = mx.nd.array(rep(1, batch_size))), match.name = TRUE)
+  mx.exec.forward(exec_D_back, is.train = T)
   mx.exec.backward(exec_D_back)
   D_grads <- exec_D_back$ref.grad.arrays$data
-  mx.exec.backward(exec_G, out_grads=D_grads)
+  mx.exec.backward(exec_G, out_grads = D_grads)
   
   update_args_G <- updater_G(weight = exec_G$ref.arg.arrays, grad = exec_G$ref.grad.arrays)
-  mx.exec.update.arg.arrays(exec_G, update_args_G, skip.null=TRUE)
+  mx.exec.update.arg.arrays(exec_G, update_args_G, skip.null = TRUE)
   
-  ### Update metrics
-  #metric_G_value <- metric_G$update(values[[label_name]], exec_G$ref.outputs[[output_name]], metric_G_value)
+  ### Update metrics metric_G_value <- metric_G$update(values[[label_name]],
+  ### exec_G$ref.outputs[[output_name]], metric_G_value)
   
-  if (iteration %% 25==0){
+  if (iteration%%25 == 0) {
     D_metric_result <- metric_D$get(metric_D_value)
-    cat(paste0("[", iteration, "] ", D_metric_result$name, ": ", D_metric_result$value, "\n"))
+    cat(paste0("[", iteration, "] ", D_metric_result$name, ": ", D_metric_result$value, 
+      "\n"))
   }
   
-  if (iteration==1 | iteration %% 100==0){
+  if (iteration == 1 | iteration%%100 == 0) {
     
     metric_D_value <- metric_D$init()
     
-    par(mfrow=c(3,3), mar=c(0.1,0.1,0.1,0.1))
+    par(mfrow = c(3, 3), mar = c(0.1, 0.1, 0.1, 0.1))
     for (i in 1:9) {
-      img <- as.array(exec_G$ref.outputs$G_sym_output)[,,,i]
-      plot(as.cimg(img), axes=F)
+      img <- as.array(exec_G$ref.outputs$G_sym_output)[, , , i]
+      plot(as.cimg(img), axes = F)
     }
-
+    
     print(as.numeric(as.array(G_values$digit)))
     print(as.numeric(as.array(D_values$label)))
     
   }
 }
 
-ifelse(!dir.exists(file.path(".", "models")), dir.create(file.path(".", "models")), "Folder already exists")
+ifelse(!dir.exists(file.path(".", "models")), dir.create(file.path(".", "models")), 
+  "Folder already exists")
 mx.symbol.save(D_sym, filename = "models/D_sym_model_v1.json")
 mx.nd.save(exec_D$arg.arrays, filename = "models/D_aux_params_v1.params")
 mx.nd.save(exec_D$aux.arrays, filename = "models/D_aux_params_v1.params")
@@ -212,19 +297,19 @@ G_sym <- mx.symbol.load("models/G_sym_model_v1.json")
 G_arg_params <- mx.nd.load("models/G_arg_params_v1.params")
 G_aux_params <- mx.nd.load("models/G_aux_params_v1.params")
 
-digit <- mx.nd.array(rep(9, times=batch_size))
+digit <- mx.nd.array(rep(9, times = batch_size))
 data <- mx.nd.one.hot(indices = digit, depth = 10)
-data <- mx.nd.reshape(data = data, shape = c(1,1,-1, batch_size))
+data <- mx.nd.reshape(data = data, shape = c(1, 1, -1, batch_size))
 
-exec_G <- mx.simple.bind(symbol = G_sym, data=data_shape_G, ctx = devices, grad.req = "null")
-mx.exec.update.arg.arrays(exec_G, G_arg_params, match.name=TRUE)
-mx.exec.update.arg.arrays(exec_G, list(data=data), match.name=TRUE)
-mx.exec.update.aux.arrays(exec_G, G_aux_params, match.name=TRUE)
+exec_G <- mx.simple.bind(symbol = G_sym, data = data_shape_G, ctx = devices, grad.req = "null")
+mx.exec.update.arg.arrays(exec_G, G_arg_params, match.name = TRUE)
+mx.exec.update.arg.arrays(exec_G, list(data = data), match.name = TRUE)
+mx.exec.update.aux.arrays(exec_G, G_aux_params, match.name = TRUE)
 
-mx.exec.forward(exec_G, is.train=F)
+mx.exec.forward(exec_G, is.train = F)
 
-par(mfrow=c(3,3), mar=c(0.1,0.1,0.1,0.1))
+par(mfrow = c(3, 3), mar = c(0.1, 0.1, 0.1, 0.1))
 for (i in 1:9) {
-  img <- as.array(exec_G$ref.outputs$G_sym_output)[,,,i]
-  plot(as.cimg(img), axes=F)
+  img <- as.array(exec_G$ref.outputs$G_sym_output)[, , , i]
+  plot(as.cimg(img), axes = F)
 }
diff --git a/example/gan/CGAN_mnist_R/iterators.R b/example/gan/CGAN_mnist_R/iterators.R
index 6069296c24f2..dffe468ad2c7 100644
--- a/example/gan/CGAN_mnist_R/iterators.R
+++ b/example/gan/CGAN_mnist_R/iterators.R
@@ -16,64 +16,66 @@
 # under the License.
 
 
-G_iterator<- function(batch_size){
+G_iterator <- function(batch_size) {
   
-  batch<- 0
-  batch_per_epoch<-5
+  batch <- 0
+  batch_per_epoch <- 5
   
-  reset<- function(){
-    batch<<- 0
+  reset <- function() {
+    batch <<- 0
   }
   
-  iter.next<- function(){
-    batch<<- batch+1
-    if (batch>batch_per_epoch) {
+  iter.next <- function() {
+    batch <<- batch + 1
+    if (batch > batch_per_epoch) {
       return(FALSE)
     } else {
       return(TRUE)
     }
   }
   
-  value<- function(){
-    set.seed(123+batch)
-    digit<- mx.nd.array(sample(0:9, size = batch_size, replace = T))
-    data<- mx.nd.one.hot(indices = digit, depth = 10)
-    data<- mx.nd.reshape(data = data, shape = c(1,1,-1, batch_size))
-    return(list(data=data, digit=digit))
+  value <- function() {
+    set.seed(123 + batch)
+    digit <- mx.nd.array(sample(0:9, size = batch_size, replace = T))
+    data <- mx.nd.one.hot(indices = digit, depth = 10)
+    data <- mx.nd.reshape(data = data, shape = c(1, 1, -1, batch_size))
+    return(list(data = data, digit = digit))
   }
   
-  return(list(reset=reset, iter.next=iter.next, value=value, batch_size=batch_size, batch=batch))
+  return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, 
+    batch = batch))
 }
 
-D_iterator<- function(batch_size){
+D_iterator <- function(batch_size) {
   
-  batch<- 0
-  batch_per_epoch<-5
+  batch <- 0
+  batch_per_epoch <- 5
   
-  reset<- function(){
-    batch<<- 0
+  reset <- function() {
+    batch <<- 0
   }
   
-  iter.next<- function(){
-    batch<<- batch+1
-    if (batch>batch_per_epoch) {
+  iter.next <- function() {
+    batch <<- batch + 1
+    if (batch > batch_per_epoch) {
       return(FALSE)
     } else {
       return(TRUE)
     }
   }
   
-  value<- function(){
-    set.seed(123+batch)
-    idx<- sample(length(train_label), size = batch_size, replace = T)
-    data<- train_data[,,,idx, drop=F]
-    label<- mx.nd.array(train_label[idx])
-    digit<- mx.nd.one.hot(indices = label, depth = 10)
+  value <- function() {
+    set.seed(123 + batch)
+    idx <- sample(length(train_label), size = batch_size, replace = T)
+    data <- train_data[, , , idx, drop = F]
+    label <- mx.nd.array(train_label[idx])
+    digit <- mx.nd.one.hot(indices = label, depth = 10)
     
-    return(list(data=mx.nd.array(data), digit=digit, label=label))
+    return(list(data = mx.nd.array(data), digit = digit, label = label))
   }
   
-  return(list(reset=reset, iter.next=iter.next, value=value, batch_size=batch_size, batch=batch))
+  return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, 
+    batch = batch))
 }