Skip to content

Commit

Permalink
Merge remote-tracking branch 'offical/master' into int8_dataloader
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhennanQin committed Feb 13, 2019
2 parents cb91ee4 + ce031da commit 2305e22
Show file tree
Hide file tree
Showing 40 changed files with 788 additions and 482 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ else()
add_definitions(-DMXNET_USE_NCCL=0)
endif()

include(cmake/ChooseBlas.cmake)
if(USE_CUDA AND FIRST_CUDA)
include(cmake/ChooseBlas.cmake)
include(3rdparty/mshadow/cmake/Utils.cmake)
include(cmake/FirstClassLangCuda.cmake)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
Expand Down
1 change: 1 addition & 0 deletions ci/docker/install/ubuntu_clojure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ echo 'Installing Clojure...'
wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein
chmod 775 lein
sudo cp lein /usr/local/bin
echo "Y" | sudo lein downgrade 2.8.3
1 change: 1 addition & 0 deletions ci/docker/install/ubuntu_core.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ apt-get install -y \
libatlas-base-dev \
libcurl4-openssl-dev \
libjemalloc-dev \
libhdf5-dev \
liblapack-dev \
libopenblas-dev \
libopencv-dev \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/runtime_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,7 @@ unittest_ubuntu_cpu_julia() {
export PATH="$1/bin:$PATH"
export MXNET_HOME='/work/mxnet'
export JULIA_DEPOT_PATH='/work/julia-depot'
export INTEGRATION_TEST=1

julia -e 'using InteractiveUtils; versioninfo()'

Expand Down
4 changes: 2 additions & 2 deletions ci/jenkins/Jenkins_steps.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1028,7 +1028,7 @@ def test_unix_r_gpu() {
def test_unix_julia07_cpu() {
return ['Julia 0.7: CPU': {
node(NODE_LINUX_CPU) {
ws('workspace/ut-julia07-cpu') {
ws('workspace/ut-it-julia07-cpu') {
timeout(time: max_time, unit: 'MINUTES') {
utils.unpack_and_init('cpu', mx_lib)
utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_julia07', false)
Expand All @@ -1041,7 +1041,7 @@ def test_unix_julia07_cpu() {
def test_unix_julia10_cpu() {
return ['Julia 1.0: CPU': {
node(NODE_LINUX_CPU) {
ws('workspace/ut-julia10-cpu') {
ws('workspace/ut-it-julia10-cpu') {
timeout(time: max_time, unit: 'MINUTES') {
utils.unpack_and_init('cpu', mx_lib)
utils.docker_run('ubuntu_cpu', 'unittest_ubuntu_cpu_julia10', false)
Expand Down
1 change: 1 addition & 0 deletions docs/build_version_doc/setup_docs_ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ echo "Installing Clojure dependencies..."
wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein
chmod 775 lein
sudo cp lein /usr/local/bin
echo "Y" | sudo lein downgrade 2.8.3


echo "Installing R dependencies..."
Expand Down
24 changes: 15 additions & 9 deletions example/bayesian-methods/algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Create implementation of algorithms of HMC, stepHMC, SGD, SGLD and DistilledSGLD"""
from __future__ import print_function
import time
import numpy
import mxnet as mx
import mxnet.ndarray as nd
import time
import logging
from utils import *
from utils import copy_param, get_executor, sample_test_regression, sample_test_acc


def calc_potential(exe, params, label_name, noise_precision, prior_precision):
Expand All @@ -35,6 +35,7 @@ def calc_potential(exe, params, label_name, noise_precision, prior_precision):


def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
"""Calculate gradient"""
exe.copy_params_from(params)
exe.arg_dict['data'][:] = X
if outgrad_f is None:
Expand All @@ -48,8 +49,8 @@ def calc_grad(exe, exe_grads, params, X, Y, label_name=None, outgrad_f=None):
v.wait_to_read()


def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
eps=1E-6):
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10, eps=1E-6):
"""Generate the implementation of step HMC"""
init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
Expand Down Expand Up @@ -102,6 +103,7 @@ def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_preci
def HMC(sym, data_inputs, X, Y, X_test, Y_test, sample_num,
initializer=None, noise_precision=1 / 9.0, prior_precision=0.1,
learning_rate=1E-6, L=10, dev=mx.gpu()):
"""Generate the implementation of HMC"""
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, exe_params, exe_grads, _ = get_executor(sym, dev, data_inputs, initializer)
exe.arg_dict['data'][:] = X
Expand Down Expand Up @@ -134,6 +136,7 @@ def SGD(sym, data_inputs, X, Y, X_test, Y_test, total_iter_num,
out_grad_f=None,
initializer=None,
minibatch_size=100, dev=mx.gpu()):
"""Generate the implementation of SGD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
Expand Down Expand Up @@ -173,6 +176,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
initializer=None,
minibatch_size=100, thin_interval=100, burn_in_iter_num=1000, task='classification',
dev=mx.gpu()):
"""Generate the implementation of SGLD"""
if out_grad_f is None:
label_key = list(set(data_inputs.keys()) - set(['data']))[0]
exe, params, params_grad, _ = get_executor(sym, dev, data_inputs, initializer)
Expand Down Expand Up @@ -200,7 +204,7 @@ def SGLD(sym, X, Y, X_test, Y_test, total_iter_num,
if i < burn_in_iter_num:
continue
else:
if 0 == (i - burn_in_iter_num) % thin_interval:
if (i - burn_in_iter_num) % thin_interval == 0:
if optimizer.lr_scheduler is not None:
lr = optimizer.lr_scheduler(optimizer.num_update)
else:
Expand Down Expand Up @@ -238,6 +242,7 @@ def DistilledSGLD(teacher_sym, student_sym,
minibatch_size=100,
task='classification',
dev=mx.gpu()):
"""Generate the implementation of DistilledSGLD"""
teacher_exe, teacher_params, teacher_params_grad, _ = \
get_executor(teacher_sym, dev, teacher_data_inputs, teacher_initializer)
student_exe, student_params, student_params_grad, _ = \
Expand Down Expand Up @@ -323,13 +328,14 @@ def DistilledSGLD(teacher_sym, student_sym,
sample_test_acc(teacher_exe, X=X, Y=Y, label_num=10,
minibatch_size=minibatch_size)
print("Student: Test ACC %d/%d=%f, Train ACC %d/%d=%f" % (test_correct, test_total,
test_acc, train_correct, train_total, train_acc))
test_acc, train_correct,
train_total, train_acc))
print("Teacher: Test ACC %d/%d=%f, Train ACC %d/%d=%f" \
% (teacher_test_correct, teacher_test_total, teacher_test_acc,
teacher_train_correct, teacher_train_total, teacher_train_acc))
else:
print("Current Iter Num: %d" % (i + 1), "Time Spent: %f" % (end - start), "MSE:",
sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
sample_test_regression(exe=student_exe, X=X_test, Y=Y_test,
minibatch_size=minibatch_size,
save_path='regression_DSGLD.txt'))
start = time.time()
Expand Down
101 changes: 57 additions & 44 deletions example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,21 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Run Stochastic Gradient Langevin Dynamics (SGLD) and Bayesian Dark Knowledge (BDK)"""
from __future__ import print_function
import mxnet as mx
import mxnet.ndarray as nd
import argparse
import time
import numpy
import logging
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
import argparse
from algos import *
from data_loader import *
from utils import *
import mxnet as mx
import mxnet.ndarray as nd
from algos import HMC, SGD, SGLD, DistilledSGLD
from data_loader import load_mnist, load_toy, load_synthetic
from utils import BiasXavier, SGLDScheduler


class CrossEntropySoftmax(mx.operator.NumpyOp):
"""Calculate CrossEntropy softmax function"""
def __init__(self):
super(CrossEntropySoftmax, self).__init__(False)

Expand Down Expand Up @@ -58,6 +58,7 @@ def backward(self, out_grad, in_data, out_data, in_grad):


class LogSoftmax(mx.operator.NumpyOp):
"""Generate helper functions to evaluate softmax loss function"""
def __init__(self):
super(LogSoftmax, self).__init__(False)

Expand Down Expand Up @@ -103,6 +104,7 @@ def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precisi


def get_mnist_sym(output_op=None, num_hidden=400):
"""Get symbol of mnist"""
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='mnist_fc1', num_hidden=num_hidden)
net = mx.symbol.Activation(data=net, name='mnist_relu1', act_type="relu")
Expand All @@ -117,6 +119,7 @@ def get_mnist_sym(output_op=None, num_hidden=400):


def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None):
"""Get synthetic gradient value"""
if grad is None:
grad = nd.empty(theta.shape, theta.context)
theta1 = theta.asnumpy()[0]
Expand All @@ -128,17 +131,16 @@ def synthetic_grad(X, theta, sigma1, sigma2, sigmax, rescale_grad=1.0, grad=None
-(X - theta1 - theta2) ** 2 / (2 * vx))
grad_npy = numpy.zeros(theta.shape)
grad_npy[0] = -rescale_grad * ((numpy.exp(-(X - theta1) ** 2 / (2 * vx)) * (X - theta1) / vx
+ numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
X - theta1 - theta2) / vx) / denominator).sum() \
+ theta1 / v1
grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) * (
X - theta1 - theta2) / vx) / denominator).sum() \
+ theta2 / v2
+ numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
(X - theta1 - theta2) / vx) / denominator).sum() + theta1 / v1
grad_npy[1] = -rescale_grad * ((numpy.exp(-(X - theta1 - theta2) ** 2 / (2 * vx)) *
(X - theta1 - theta2) / vx) / denominator).sum() + theta2 / v2
grad[:] = grad_npy
return grad


def get_toy_sym(teacher=True, teacher_noise_precision=None):
"""Get toy symbol"""
if teacher:
net = mx.symbol.Variable('data')
net = mx.symbol.FullyConnected(data=net, name='teacher_fc1', num_hidden=100)
Expand All @@ -160,8 +162,9 @@ def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()


def run_mnist_SGD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)

def run_mnist_SGD(num_training=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
Expand All @@ -175,8 +178,8 @@ def run_mnist_SGD(training_num=50000, gpu_id=None):
lr=5E-6, prior_precision=1.0, minibatch_size=100)


def run_mnist_SGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
def run_mnist_SGLD(num_training=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
Expand All @@ -191,10 +194,11 @@ def run_mnist_SGLD(training_num=50000, gpu_id=None):
thin_interval=100, burn_in_iter_num=1000)


def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
def run_mnist_DistilledSGLD(num_training=50000, gpu_id=None):
"""Run DistilledSGLD on mnist dataset"""
X, Y, X_test, Y_test = load_mnist(num_training)
minibatch_size = 100
if training_num >= 10000:
if num_training >= 10000:
num_hidden = 800
total_iter_num = 1000000
teacher_learning_rate = 1E-6
Expand Down Expand Up @@ -235,6 +239,7 @@ def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):


def run_toy_SGLD(gpu_id=None):
"""Run SGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
Expand All @@ -243,20 +248,26 @@ def run_toy_SGLD(gpu_id=None):
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=50000,
initializer=initializer,
learning_rate=1E-4,
# lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
prior_precision=0.1,
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size, dev=dev(gpu_id))


def run_toy_DistilledSGLD(gpu_id=None):
exe, params, _ = SGLD(sym=net,
data_inputs=data_inputs,
X=X,
Y=Y,
X_test=X_test,
Y_test=Y_test,
total_iter_num=50000,
initializer=initializer,
learning_rate=1E-4,
# lr_scheduler=mx.lr_scheduler.FactorScheduler(100000, 0.5),
prior_precision=0.1,
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size,
dev=dev(gpu_id)) # disable=unbalanced-tuple-unpacking


def run_toy_DistilledSGLD(gpu_id):
"""Run DistilledSGLD on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
Expand Down Expand Up @@ -288,6 +299,7 @@ def run_toy_DistilledSGLD(gpu_id=None):


def run_toy_HMC(gpu_id=None):
"""Run HMC on toy dataset"""
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
Expand All @@ -302,6 +314,7 @@ def run_toy_HMC(gpu_id=None):


def run_synthetic_SGLD():
"""Run synthetic SGLD"""
theta1 = 0
theta2 = 1
sigma1 = numpy.sqrt(10)
Expand All @@ -322,14 +335,14 @@ def run_synthetic_SGLD():
grad = nd.empty((2,), mx.cpu())
samples = numpy.zeros((2, total_iter_num))
start = time.time()
for i in xrange(total_iter_num):
for i in range(total_iter_num):
if (i + 1) % 100000 == 0:
end = time.time()
print("Iter:%d, Time spent: %f" % (i + 1, end - start))
start = time.time()
ind = numpy.random.randint(0, X.shape[0])
synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax, rescale_grad=
X.shape[0] / float(minibatch_size), grad=grad)
synthetic_grad(X[ind], theta, sigma1, sigma2, sigmax,
rescale_grad=X.shape[0] / float(minibatch_size), grad=grad)
updater('theta', grad, theta)
samples[:, i] = theta.asnumpy()
plt.hist2d(samples[0, :], samples[1, :], (200, 200), cmap=plt.cm.jet)
Expand All @@ -354,18 +367,18 @@ def run_synthetic_SGLD():
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
if args.algorithm == 0:
run_mnist_SGD(training_num, gpu_id=args.gpu)
elif 1 == args.algorithm:
elif args.algorithm == 1:
run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
if args.algorithm == 1:
run_toy_SGLD(gpu_id=args.gpu)
elif 2 == args.algorithm:
elif args.algorithm == 2:
run_toy_DistilledSGLD(gpu_id=args.gpu)
elif 3 == args.algorithm:
elif args.algorithm == 3:
run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
5 changes: 3 additions & 2 deletions example/bayesian-methods/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""Create helper functions to load mnist dataset and toy dataset"""
from __future__ import print_function
import numpy
import os
import ssl
import numpy


def load_mnist(training_num=50000):
"""Load mnist dataset"""
data_path = os.path.join(os.path.dirname(os.path.realpath('__file__')), 'mnist.npz')
if not os.path.isfile(data_path):
from six.moves import urllib
Expand Down
Loading

0 comments on commit 2305e22

Please sign in to comment.