Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Jupyter notebook for benchmark demo
Browse files Browse the repository at this point in the history
  • Loading branch information
StephanieYuan committed Sep 8, 2018
1 parent eb1bef8 commit 4028bdf
Show file tree
Hide file tree
Showing 8 changed files with 392 additions and 23 deletions.
8 changes: 4 additions & 4 deletions example/svrg_module/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ YearPredictionMSD: contains predictions of the release year of a song from audio
400,000 samples with 90 features. Please uncomment data downloading script from data_reader.py to download the data.

#### Benchmarks:
An initial set of benchmarks has been performed on YearPredictionDatasetMSD with linear regression model.
An initial set of benchmarks has been performed on YearPredictionDatasetMSD with linear regression model. A jupyter
notebook under `/benchmarks` demonstrates the training process and plots two graphs for benchmarking.

* benchmark1.py: A lr_scheduler returns a new learning rate based on the number of updates that have been performed.
The training loss of SVRG is less than SGD with lr_scheduler over all of the 100 epochs.
* benchmark1: A lr_scheduler returns a new learning rate based on the number of updates that have been performed.

* benchmark2.py: One drawback for SGD is that in order to converge faster, the learning rate has to decay to zero,
* benchmark2: One drawback for SGD is that in order to converge faster, the learning rate has to decay to zero,
thus SGD needs to start with a small learning rate. The learning rate does not need to decay to zero for SVRG,
therefore we can use a relatively larger learning rate. SGD with learning rate of (0.001, 0.0025) and SVRG with
learning rate of (0.025) are benchmarked. Even though SVRG starts with a relatively large learning rate, it converges
Expand Down
Binary file removed example/svrg_module/benchmarks/benchmark1.png
Binary file not shown.
Binary file removed example/svrg_module/benchmarks/benchmark2.png
Binary file not shown.
379 changes: 379 additions & 0 deletions example/svrg_module/benchmarks/svrg_benchmark.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion example/svrg_module/linear_regression/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size
bias = mx.sym.Variable("fc_bias", shape=(1,), wd_mult=0.0, lr_mult=10.0)
net = mx.sym.broadcast_plus(net, bias)
net = mx.sym.LinearRegressionOutput(data=net, label=label)

mod = SVRGModule(symbol=net, context=ctx, data_names=['data'], label_names=['label'], logger=logger,
update_freq=update_freq)
return train_iter, mod
Expand Down
16 changes: 8 additions & 8 deletions example/svrg_module/linear_regression/data_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,16 @@


import numpy as np
from sklearn.datasets import load_svmlight_file

# Download data file
# from subprocess import call
# YearPredictionMSD dataset: https://archive.ics.uci.edu/ml/datasets/yearpredictionmsd
# call(['wget', 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2'])
# call(['bzip2', '-d', 'YearPredictionMSD.bz2'])

def read_year_prediction_data(fileName):
# Download data file
# from subprocess import call
# call(['wget', 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2'])
# call(['bzip2', '-d', 'YearPredictionMSD.bz2'])

from sklearn.datasets import load_svmlight_file

# YearPredictionMSD dataset: https://archive.ics.uci.edu/ml/datasets/yearpredictionmsd
def read_year_prediction_data(fileName):
feature_dim = 90
print("Reading data from disk...")
train_features, train_labels = load_svmlight_file(fileName, n_features=feature_dim, dtype=np.float32)
Expand All @@ -43,3 +42,4 @@ def read_year_prediction_data(fileName):
train_labels = (train_labels - label_mean) / label_std

return feature_dim, train_features, train_labels

9 changes: 0 additions & 9 deletions python/mxnet/contrib/svrg_optimization/svrg_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,6 @@ def bind(self, data_shapes, label_shapes=None, for_training=True,
"""
# force rebinding is typically used when one want to switch from
# training to prediction phase.

super(SVRGModule, self).bind(data_shapes, label_shapes, for_training, inputs_need_grad, force_rebind,
shared_module, grad_req)

Expand All @@ -248,7 +247,6 @@ def forward(self, data_batch, is_train=None):
is_train : bool
Default is ``None``, which means ``is_train`` takes the value of ``self.for_training``.
"""

super(SVRGModule, self).forward(data_batch, is_train)

if is_train:
Expand All @@ -268,7 +266,6 @@ def backward(self, out_grads=None):
This parameter is only needed when bind is called
on outputs that are not a loss function.
"""

super(SVRGModule, self).backward(out_grads)

if self._mod_aux.binded:
Expand All @@ -289,7 +286,6 @@ def update(self):
----------
:meth:`BaseModule.update`.
"""

self._update_svrg_gradients()
super(SVRGModule, self).update()

Expand All @@ -301,7 +297,6 @@ def update_full_grads(self, train_data):
----------
train_data: DataIter
Train data iterator
"""
param_names = self._exec_group.param_names
arg, aux = self.get_params()
Expand Down Expand Up @@ -341,7 +336,6 @@ def _accumulate_kvstore(self, key, value):
value: NDArray, RowSparseNDArray
Average of the full gradients.
"""

# Accumulate full gradients for current epochs
self._kvstore.push(key + "_full", value)
self._kvstore._barrier()
Expand All @@ -360,7 +354,6 @@ def _allocate_gradients(self, key, value):
value: List of NDArray, List of RowSparseNDArray
A list of average of the full gradients in the KVStore.
"""

for i in range(self._ctx_len):
self._param_dict[i][key] = value[i] / self._ctx_len

Expand All @@ -381,7 +374,6 @@ def _svrg_grads_update_rule(self, g_curr_batch_curr_weight, g_curr_batch_special
Gradients calculated using SVRG update rule:
grads = g_curr_batch_curr_weight - g_curr_batch_special_weight + g_special_weight_all_batch
"""

for index, grad in enumerate(g_curr_batch_curr_weight):
grad -= g_curr_batch_special_weight[index]
grad += g_special_weight_all_batch[index]
Expand Down Expand Up @@ -473,7 +465,6 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
validation_metric: str or EvalMetric
The performance measure used to display during validation.
"""

assert num_epoch is not None, 'please specify number of epochs'

self.bind(data_shapes=train_data.provide_data, label_shapes=train_data.provide_label,
Expand Down
2 changes: 1 addition & 1 deletion tests/python/unittest/test_contrib_svrg_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_svrgmodule_reshape():
sym = mx.sym.FullyConnected(data=data, num_hidden=4, name='fc')

dshape=(3, 4)
mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=1)
mod = SVRGModule(sym, data_names=["data"], label_names=None, context=[mx.cpu(0), mx.cpu(1)], update_freq=2)
mod.bind(data_shapes=[('data', dshape)])
mod.init_params()
mod._mod_aux.init_params()
Expand Down

0 comments on commit 4028bdf

Please sign in to comment.