SVRG optimization in python/contrib package, this version supports si…

…ngle machine single cpu, single gpu and multi-gpus
apache · Aug 29, 2018 · 495be3b · 495be3b
1 parent 6fdfd89
commit 495be3b
Show file tree

Hide file tree

Showing 17 changed files with 433 additions and 304 deletions.
diff --git a/contrib/svrg_optimization_python/src/__init__.py b/contrib/svrg_optimization_python/src/__init__.py
diff --git a/contrib/svrg_optimization_python/tests/__init__.py b/contrib/svrg_optimization_python/tests/__init__.py
diff --git a/contrib/svrg_optimization_python/tests/test_svrg_module.py b/contrib/svrg_optimization_python/tests/test_svrg_module.py
diff --git a/contrib/svrg_optimization_python/tests/test_svrg_optimizer.py b/contrib/svrg_optimization_python/tests/test_svrg_optimizer.py
diff --git a/...mization_python/benchmarks/benchmark1.png → ...ple/svrg_module/benchmarks/benchmark1.png b/...mization_python/benchmarks/benchmark1.png → ...ple/svrg_module/benchmarks/benchmark1.png
diff --git a/...mization_python/benchmarks/benchmark2.png → ...ple/svrg_module/benchmarks/benchmark2.png b/...mization_python/benchmarks/benchmark2.png → ...ple/svrg_module/benchmarks/benchmark2.png
diff --git a/example/svrg_module/common.py b/example/svrg_module/common.py
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import mxnet as mx
+import logging
+from mxnet.contrib.svrg_optimization.svrg_module import SVRGModule
+
+
+def create_lin_reg_network(train_features, train_labels, feature_dim, batch_size, update_freq, ctx, logger):
+    # fit a linear regression model with mxnet SVRG
+    print("Fitting linear regression with mxnet")
+    train_iter = mx.io.NDArrayIter(train_features, train_labels, batch_size=batch_size, shuffle=True,
+                                   data_name='data', label_name='label')
+    data = mx.sym.Variable("data")
+    label = mx.sym.Variable("label")
+    weight = mx.sym.Variable("fc_weight", shape=(1, feature_dim))
+    net = mx.sym.dot(data, weight.transpose())
+    bias = mx.sym.Variable("fc_bias", shape=(1,), wd_mult=0.0, lr_mult=10.0)
+    net = mx.sym.broadcast_plus(net, bias)
+    net = mx.sym.LinearRegressionOutput(data=net, label=label)
+
+    mod = SVRGModule(symbol=net, context=ctx, data_names=['data'], label_names=['label'], logger=logger,
+                     update_freq=update_freq)
+    return train_iter, mod
+
+
+def create_metrics(metrics):
+    metric = mx.metric.create(metrics)
+    return metric
+
+
+def create_logger():
+    logger = logging.getLogger('sgd_svrg')
+    logger.setLevel(logging.INFO)
+    formatter = logging.Formatter('%(asctime)s - %(message)s')
+    fh = logging.FileHandler('experiments_lr.log')
+    fh.setFormatter(formatter)
+    logger.addHandler(fh)
+    return logger
+
+
+def accumulate_grad(grad_dict, mod):
+    param_names = mod._exec_group.param_names
+    for i in range(len(param_names)):
+        if param_names[i] not in grad_dict:
+            grad_dict[param_names[i]] = mod._exec_group.grad_arrays[i][0].copy()
+        else:
+            grad_dict[param_names[i]] = mx.ndarray.concat(grad_dict[param_names[i]], mod._exec_group.grad_arrays[i][0],
+                                                          dim=0)
+
+
+def calc_expectation(grad_dict, count):
+    for key in grad_dict.keys():
+        grad_dict[str.format(key+"_expectation")] = mx.ndarray.sum(grad_dict[key], axis=0)/count
+
+    return grad_dict
+
+
+def calc_variance(grad_dict, count, param_names):
+    for i in range(len(param_names)):
+        diff_sqr = mx.ndarray.square(mx.nd.subtract(grad_dict[param_names[i]],
+                                                    grad_dict[str.format(param_names[i]+"_expectation")]))
+        grad_dict[str.format(param_names[i] + "_variance")] = mx.ndarray.sum(diff_sqr, axis=0) / count
diff --git a/example/svrg_module/data_reader.py b/example/svrg_module/data_reader.py
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import numpy as np
+
+
+def read_year_prediction_data(fileName):
+    # Download data file
+    # from subprocess import call
+    # call(['wget', 'https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/YearPredictionMSD.bz2'])
+    # call(['bzip2', '-d', 'YearPredictionMSD.bz2'])
+
+    from sklearn.datasets import load_svmlight_file
+
+    feature_dim = 90
+    print("Reading data from disk...")
+    train_features, train_labels = load_svmlight_file(fileName, n_features=feature_dim, dtype=np.float32)
+    train_features = train_features.todense()
+
+    # normalize the data: subtract means and divide by standard deviations
+    label_mean = train_labels.mean()
+    label_std = np.sqrt(np.square(train_labels - label_mean).mean())
+    feature_means = train_features.mean(axis=0)
+    feature_stds = np.sqrt(np.square(train_features - feature_means).mean(axis=0))
+
+    train_features = (train_features - feature_means) / feature_stds
+    train_labels = (train_labels - label_mean) / label_std
+
+    return feature_dim, train_features, train_labels