Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1291] solve pylint errors in examples with issue no.12205 #13815

Merged
merged 6 commits into from
Feb 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions example/bayesian-methods/algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and DistilledSGLD"""
from __future__ import print_function
import time
import numpy
import mxnet as mx
import mxnet.ndarray as nd
import time
import logging
from utils import *
from utils import copy_param, get_executor, sample_test_regression, sample_test_acc


def calc_potential(exe, params, label_name, noise_precision, prior_precision):
Expand All @@ -35,6 +35,7 @@ def calc_potential(exe, params, label_name, noise_precision, prior_precision):


def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
"""Calculate gradient"""
exe.copy_params_from(params)
exe.arg_dict['data'][:] = X
if outgrad_f is None:
Expand All @@ -48,8 +49,8 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
v.wait_to_read()


def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
eps=1E-6):
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10, eps=1E-6):
"""Generate the implementation of step HMC"""
init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
Expand Down Expand Up @@ -102,6 +103,7 @@ def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_preci
def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
learning_rate=1E-6, L=10, dev=mx.gpu()):
"""Generate the implementation of HMC"""
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs, initializer)
exe.arg_dict['data'][:] = X
Expand Down Expand Up @@ -134,6 +136,7 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test, total_iter_num,
out_grad_f=None,
initializer=None,
minibatch_size=100, dev=mx.gpu()):
"""Generate the implementation of SGD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
Expand Down Expand Up @@ -173,6 +176,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
initializer=None,
minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, task='classification',
dev=mx.gpu()):
"""Generate the implementation of SGLD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
Expand Down Expand Up @@ -200,7 +204,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
if i < burn_in_iter_num:
continue
else:
if 0 == (i - burn_in_iter_num) % thin_interval:
if (i - burn_in_iter_num) % thin_interval == 0:
if optimizer.lr_scheduler is not None:
lr = optimizer.lr_scheduler(optimizer.num_update)
else:
Expand Down Expand Up @@ -238,6 +242,7 @@ def DistilledSGLD(teacher_sym, student_sym,
minibatch_size=100,
task='classification',
dev=mx.gpu()):
"""Generate the implementation of DistilledSGLD"""
teacher_exe, teacher_params, teacher_params_grad, _ = \
get_executor(teacher_sym, dev, teacher_data_inputs, teacher_initializer)
student_exe, student_params, student_params_grad, _ = \
Expand Down Expand Up @@ -323,13 +328,14 @@ def DistilledSGLD(teacher_sym, student_sym,
sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
minibatch_size=minibatch_size)
print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" % (test_correct, test_total,
test_acc, train_correct, train_total, train_acc))
test_acc, train_correct,
train_total, train_acc))
print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
% (teacher_test_correct, teacher_test_total, teacher_test_acc,
teacher_train_correct, teacher_train_total, teacher_train_acc))
else:
print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start), "MSE:",
sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
minibatch_size=minibatch_size,
save_path='regression_DSGLD.txt'))
start = time.time()
Expand Down
101 changes: 57 additions & 44 deletions example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,21 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark Knowledge (BDK)"""
from __future__ import print_function
import mxnet as mx
import mxnet.ndarray as nd
import argparse
import time
import numpy
import logging
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import argparse
from algos import *
from data_loader import *
from utils import *
import mxnet as mx
import mxnet.ndarray as nd
from algos import HMC, SGD, SGLD, DistilledSGLD
from data_loader import load_mnist, load_toy, load_synthetic
from utils import BiasXavier, SGLDScheduler


class CrossEntropySoftmax(mx.operator.NumpyOp):
"""Calculate CrossEntropy softmax function"""
def __init__(self):
super(CrossEntropySoftmax, self).__init__(False)

Expand Down Expand Up @@ -58,6 +58,7 @@ def backward(self, out_grad, in_data, out_data, in_grad):


class LogSoftmax(mx.operator.NumpyOp):
"""Generate helper functions to evaluate softmax loss function"""
def __init__(self):
super(LogSoftmax, self).__init__(False)

Expand Down Expand Up @@ -103,6 +104,7 @@ def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precisi


def get_mnist_sym(output_op=None, num_hidden=400):
"""Get symbol of mnist"""
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='mnist_fc1', num_hidden=num_hidden)
net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
Expand All @@ -117,6 +119,7 @@ def get_mnist_sym(output_op=None, num_hidden=400):


def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
"""Get synthetic gradient value"""
if grad is None:
grad = nd.empty(theta.shape, theta.context)
theta1 = theta.asnumpy()[0]
Expand All @@ -128,17 +131,16 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None
-(X - theta1 - theta2) ** 2 / (2 * vx))
grad_npy = numpy.zeros(theta.shape)
grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
+ numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
X - theta1 - theta2) / vx) / denominator).sum() \
+ theta1 / v1
grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
X - theta1 - theta2) / vx) / denominator).sum() \
+ theta2 / v2
+ numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
(X - theta1 - theta2) / vx) / denominator).sum() + theta1 / v1
grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
(X - theta1 - theta2) / vx) / denominator).sum() + theta2 / v2
grad[:] = grad_npy
return grad


def get_toy_sym(teacher=True, teacher_noise_precision=None):
"""Get toy symbol"""
if teacher:
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='teacher_fc1', num_hidden=100)
Expand All @@ -160,8 +162,9 @@ def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()


def run_mnist_SGD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)

def run_mnist_SGD(num_training=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
Expand All @@ -175,8 +178,8 @@ def run_mnist_SGD(training_num=50000, gpu_id=None):
lr=5E-6, prior_precision=1.0, minibatch_size=100)


def run_mnist_SGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
def run_mnist_SGLD(num_training=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
Expand All @@ -191,10 +194,11 @@ def run_mnist_SGLD(training_num=50000, gpu_id=None):
thin_interval=100, burn_in_iter_num=1000)


def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None):
"""Run DistilledSGLD on mnist dataset"""
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
if training_num >= 10000:
if num_training >= 10000:
num_hidden = 800
total_iter_num = 1000000
teacher_learning_rate = 1E-6
Expand Down Expand Up @@ -235,6 +239,7 @@ def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):


def run_toy_SGLD(gpu_id=None):
"""Run SGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
Expand All @@ -243,20 +248,26 @@ def run_toy_SGLD(gpu_id=None):
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
initializer=initializer,
learning_rate=1E-4,
# lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
prior_precision=0.1,
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size, dev=dev(gpu_id))


def run_toy_DistilledSGLD(gpu_id=None):
exe, params, _ = SGLD(sym=net,
data_inputs=data_inputs,
X=X,
Y=Y,
X_test=X_test,
Y_test=Y_test,
total_iter_num=50000,
initializer=initializer,
learning_rate=1E-4,
# lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
prior_precision=0.1,
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size,
dev=dev(gpu_id)) # disable=unbalanced-tuple-unpacking


def run_toy_DistilledSGLD(gpu_id):
"""Run DistilledSGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
Expand Down Expand Up @@ -288,6 +299,7 @@ def run_toy_DistilledSGLD(gpu_id=None):


def run_toy_HMC(gpu_id=None):
"""Run HMC on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
Expand All @@ -302,6 +314,7 @@ def run_toy_HMC(gpu_id=None):


def run_synthetic_SGLD():
"""Run synthetic SGLD"""
theta1 = 0
theta2 = 1
sigma1 = numpy.sqrt(10)
Expand All @@ -322,14 +335,14 @@ def run_synthetic_SGLD():
grad = nd.empty((2,), mx.cpu())
samples = numpy.zeros((2, total_iter_num))
start = time.time()
for i in xrange(total_iter_num):
for i in range(total_iter_num):
if (i + 1) % 100000 == 0:
end = time.time()
print("Iter:%d, Time spent: %f" % (i + 1, end - start))
start = time.time()
ind = numpy.random.randint(0, X.shape[0])
synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
X.shape[0] / float(minibatch_size), grad=grad)
synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
rescale_grad=X.shape[0] / float(minibatch_size), grad=grad)
updater('theta', grad, theta)
samples[:, i] = theta.asnumpy()
plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
Expand All @@ -354,18 +367,18 @@ def run_synthetic_SGLD():
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
if args.algorithm == 0:
run_mnist_SGD(training_num, gpu_id=args.gpu)
elif 1 == args.algorithm:
elif args.algorithm == 1:
run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
if args.algorithm == 1:
run_toy_SGLD(gpu_id=args.gpu)
elif 2 == args.algorithm:
elif args.algorithm == 2:
run_toy_DistilledSGLD(gpu_id=args.gpu)
elif 3 == args.algorithm:
elif args.algorithm == 3:
run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
5 changes: 3 additions & 2 deletions example/bayesian-methods/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Create helper functions to load mnist dataset and toy dataset"""
from __future__ import print_function
import numpy
import os
import ssl
import numpy


def load_mnist(training_num=50000):
"""Load mnist dataset"""
data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), 'mnist.npz')
if not os.path.isfile(data_path):
from six.moves import urllib
Expand Down
Loading