Jupyter notebook for benchmark demo

apache · Sep 8, 2018 · 4028bdf · 4028bdf
1 parent eb1bef8
commit 4028bdf
Show file tree

Hide file tree

Showing 8 changed files with 392 additions and 23 deletions.
diff --git a/example/svrg_module/README.md b/example/svrg_module/README.md
@@ -21,12 +21,12 @@ YearPredictionMSD: contains predictions of the release year of a song from audio
 400,000 samples with 90 features. Please uncomment data downloading script from data_reader.py to download the data. 
 
 #### Benchmarks:
-An initial set of benchmarks has been performed on YearPredictionDatasetMSD with linear regression model.  
+An initial set of benchmarks has been performed on YearPredictionDatasetMSD with linear regression model.  A jupyter 
+notebook under `/benchmarks` demonstrates the training process and plots two graphs for benchmarking.
 
-* benchmark1.py: A lr_scheduler returns a new learning rate based on the number of updates that have been performed. 
-The training loss of SVRG is less than SGD with lr_scheduler over all of the 100 epochs.
+* benchmark1: A lr_scheduler returns a new learning rate based on the number of updates that have been performed. 
 
-* benchmark2.py: One drawback for SGD is that in order to converge faster, the learning rate has to decay to zero, 
+* benchmark2: One drawback for SGD is that in order to converge faster, the learning rate has to decay to zero, 
 thus SGD needs to start with a small learning rate. The learning rate does not need to decay to zero for SVRG, 
 therefore we can use a relatively larger learning rate. SGD with learning rate of (0.001, 0.0025) and SVRG with 
 learning rate of (0.025) are benchmarked. Even though SVRG starts with a relatively large learning rate, it converges 

diff --git a/example/svrg_module/benchmarks/benchmark1.png b/example/svrg_module/benchmarks/benchmark1.png
diff --git a/example/svrg_module/benchmarks/benchmark2.png b/example/svrg_module/benchmarks/benchmark2.png
diff --git a/example/svrg_module/benchmarks/svrg_benchmark.ipynb b/example/svrg_module/benchmarks/svrg_benchmark.ipynb
diff --git a/example/svrg_module/linear_regression/common.py b/example/svrg_module/linear_regression/common.py
@@ -33,7 +33,6 @@ def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size
     bias = mx.sym.Variable("fc_bias", shape=(1,), wd_mult=0.0, lr_mult=10.0)
     net = mx.sym.broadcast_plus(net, bias)
     net = mx.sym.LinearRegressionOutput(data=net, label=label)
-
     mod = SVRGModule(symbol=net, context=ctx, data_names=['data'], label_names=['label'], logger=logger,
                      update_freq=update_freq)
     return train_iter, mod

diff --git a/example/svrg_module/linear_regression/data_reader.py b/example/svrg_module/linear_regression/data_reader.py
@@ -17,17 +17,16 @@
 
 
 import numpy as np
+from sklearn.datasets import load_svmlight_file
 
+# Download data file
+# from subprocess import call
+# YearPredictionMSD dataset: https://archive.ics.uci.edu/ml/datasets/yearpredictionmsd
+# call(['wget', 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2'])
+# call(['bzip2', '-d', 'YearPredictionMSD.bz2'])
 
-def read_year_prediction_data(fileName):
-    # Download data file
-    # from subprocess import call
-    # call(['wget', 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2'])
-    # call(['bzip2', '-d', 'YearPredictionMSD.bz2'])
-
-    from sklearn.datasets import load_svmlight_file
 
-    # YearPredictionMSD dataset: https://archive.ics.uci.edu/ml/datasets/yearpredictionmsd
+def read_year_prediction_data(fileName):
     feature_dim = 90
     print("Reading data from disk...")
     train_features, train_labels = load_svmlight_file(fileName, n_features=feature_dim, dtype=np.float32)
@@ -43,3 +42,4 @@ def read_year_prediction_data(fileName):
     train_labels = (train_labels - label_mean) / label_std
 
     return feature_dim, train_features, train_labels
+
diff --git a/python/mxnet/contrib/svrg_optimization/svrg_module.py b/python/mxnet/contrib/svrg_optimization/svrg_module.py
@@ -222,7 +222,6 @@ def bind(self, data_shapes, label_shapes=None, for_training=True,
         """
         # force rebinding is typically used when one want to switch from
         # training to prediction phase.
-
         super(SVRGModule, self).bind(data_shapes, label_shapes, for_training, inputs_need_grad, force_rebind,
                                      shared_module, grad_req)
 
@@ -248,7 +247,6 @@ def forward(self, data_batch, is_train=None):
         is_train : bool
             Default is ``None``, which means ``is_train`` takes the value of ``self.for_training``.
         """
-
         super(SVRGModule, self).forward(data_batch, is_train)
 
         if is_train:
@@ -268,7 +266,6 @@ def backward(self, out_grads=None):
             This parameter is only needed when bind is called
             on outputs that are not a loss function.
         """
-
         super(SVRGModule, self).backward(out_grads)
 
         if self._mod_aux.binded:
@@ -289,7 +286,6 @@ def update(self):
         ----------
         :meth:`BaseModule.update`.
         """
-
         self._update_svrg_gradients()
         super(SVRGModule, self).update()
 
@@ -301,7 +297,6 @@ def update_full_grads(self, train_data):
         ----------
         train_data: DataIter
             Train data iterator
-
         """
         param_names = self._exec_group.param_names
         arg, aux = self.get_params()
@@ -341,7 +336,6 @@ def _accumulate_kvstore(self, key, value):
         value: NDArray, RowSparseNDArray
             Average of the full gradients.
         """
-
         # Accumulate full gradients for current epochs
         self._kvstore.push(key + "_full", value)
         self._kvstore._barrier()
@@ -360,7 +354,6 @@ def _allocate_gradients(self, key, value):
         value: List of NDArray, List of RowSparseNDArray
             A list of average of the full gradients in the KVStore.
         """
-
         for i in range(self._ctx_len):
             self._param_dict[i][key] = value[i] / self._ctx_len
 
@@ -381,7 +374,6 @@ def _svrg_grads_update_rule(self, g_curr_batch_curr_weight, g_curr_batch_special
         Gradients calculated using SVRG update rule:
         grads = g_curr_batch_curr_weight - g_curr_batch_special_weight + g_special_weight_all_batch
         """
-
         for index, grad in enumerate(g_curr_batch_curr_weight):
             grad -= g_curr_batch_special_weight[index]
             grad += g_special_weight_all_batch[index]
@@ -473,7 +465,6 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
         validation_metric: str or EvalMetric
             The performance measure used to display during validation.
         """
-
         assert num_epoch is not None, 'please specify number of epochs'
 
         self.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label,

diff --git a/tests/python/unittest/test_contrib_svrg_module.py b/tests/python/unittest/test_contrib_svrg_module.py
@@ -140,7 +140,7 @@ def test_svrgmodule_reshape():
     sym = mx.sym.FullyConnected(data=data, num_hidden=4, name='fc')
 
     dshape=(3, 4)
-    mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=1)
+    mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=2)
     mod.bind(data_shapes=[('data', dshape)])
     mod.init_params()
     mod._mod_aux.init_params()