diff --git a/example/bayesian-methods/README.md b/example/bayesian-methods/README.md index ec9e8be86927..fc35b94219d7 100644 --- a/example/bayesian-methods/README.md +++ b/example/bayesian-methods/README.md @@ -11,3 +11,27 @@ and *Bayesian Dark Knowledge (BDK)* [(Balan, Rathod, Murphy and Welling, 2 **bdk.ipynb** shows how to use MXNet to implement the DistilledSGLD algorithm in Bayesian Dark Knowledge. **bdk_demo.py** contains scripts (more than the notebook) related to Bayesian Dark Knowledge. Use `python bdk_demo.py -d 1 -l 2 -t 50000` to run classification on MNIST. + +View parameters we can use with the following command. + +```shell +python bdk_demo.py -h + + +usage: bdk_demo.py [-h] [-d DATASET] [-l ALGORITHM] [-t TRAINING] [--gpu GPU] + +Examples in the paper [NIPS2015]Bayesian Dark Knowledge and [ICML2011]Bayesian +Learning via Stochastic Gradient Langevin Dynamics + +optional arguments: + -h, --help show this help message and exit + -d DATASET, --dataset DATASET + Dataset to use. 0 --> TOY, 1 --> MNIST, 2 --> + Synthetic Data in the SGLD paper + -l ALGORITHM, --algorithm ALGORITHM + Type of algorithm to use. 0 --> SGD, 1 --> SGLD, + other-->DistilledSGLD + -t TRAINING, --training TRAINING + Number of training samples + --gpu GPU 0 to use GPU, not set to use CPU +``` diff --git a/example/bayesian-methods/bdk_demo.py b/example/bayesian-methods/bdk_demo.py index 145dac10e2a6..cd39bfd2a7c9 100644 --- a/example/bayesian-methods/bdk_demo.py +++ b/example/bayesian-methods/bdk_demo.py @@ -156,34 +156,34 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None): return net -def dev(): - return mx.gpu() +def dev(gpu_id=None): + return mx.gpu(gpu_id) if gpu_id else mx.cpu() -def run_mnist_SGD(training_num=50000): +def run_mnist_SGD(training_num=50000, gpu_id=None): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] - data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} + data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) - exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, + exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, lr=5E-6, prior_precision=1.0, minibatch_size=100) -def run_mnist_SGLD(training_num=50000): +def run_mnist_SGLD(training_num=50000, gpu_id=None): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 net = get_mnist_sym() data_shape = (minibatch_size,) + X.shape[1::] - data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} + data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))} initializer = mx.init.Xavier(factor_type="in", magnitude=2.34) - exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y, + exe, sample_pool = SGLD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, total_iter_num=1000000, initializer=initializer, @@ -191,7 +191,7 @@ def run_mnist_SGLD(training_num=50000): thin_interval=100, burn_in_iter_num=1000) -def run_mnist_DistilledSGLD(training_num=50000): +def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None): X, Y, X_test, Y_test = load_mnist(training_num) minibatch_size = 100 if training_num >= 10000: @@ -214,10 +214,10 @@ def run_mnist_DistilledSGLD(training_num=50000): logsoftmax = LogSoftmax() student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden) data_shape = (minibatch_size,) + X.shape[1::] - teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())} - student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} + teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))} + student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))} teacher_initializer = BiasXavier(factor_type="in", magnitude=1) student_initializer = BiasXavier(factor_type="in", magnitude=1) student_exe, student_params, _ = \ @@ -231,17 +231,17 @@ def run_mnist_DistilledSGLD(training_num=50000): teacher_learning_rate=teacher_learning_rate, student_learning_rate=student_learning_rate, teacher_prior_precision=teacher_prior, student_prior_precision=student_prior, - perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev()) + perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id)) -def run_toy_SGLD(): +def run_toy_SGLD(gpu_id=None): X, Y, X_test, Y_test = load_toy() minibatch_size = 1 teacher_noise_precision = 1.0 / 9.0 net = get_toy_sym(True, teacher_noise_precision) data_shape = (minibatch_size,) + X.shape[1::] - data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} + data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))} initializer = mx.init.Uniform(0.07) exe, params, _ = \ SGLD(sym=net, data_inputs=data_inputs, @@ -253,20 +253,20 @@ def run_toy_SGLD(): burn_in_iter_num=1000, thin_interval=10, task='regression', - minibatch_size=minibatch_size, dev=dev()) + minibatch_size=minibatch_size, dev=dev(gpu_id)) -def run_toy_DistilledSGLD(): +def run_toy_DistilledSGLD(gpu_id=None): X, Y, X_test, Y_test = load_toy() minibatch_size = 1 teacher_noise_precision = 1.0 teacher_net = get_toy_sym(True, teacher_noise_precision) student_net = get_toy_sym(False) data_shape = (minibatch_size,) + X.shape[1::] - teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} - student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())} - # 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())} + teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))} + student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id))} + teacher_initializer = mx.init.Uniform(0.07) student_initializer = mx.init.Uniform(0.07) student_grad_f = lambda student_outputs, teacher_pred: \ @@ -284,21 +284,21 @@ def run_toy_DistilledSGLD(): student_grad_f=student_grad_f, teacher_prior_precision=0.1, student_prior_precision=0.001, perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression', - dev=dev()) + dev=dev(gpu_id)) -def run_toy_HMC(): +def run_toy_HMC(gpu_id=None): X, Y, X_test, Y_test = load_toy() minibatch_size = Y.shape[0] noise_precision = 1 / 9.0 net = get_toy_sym(True, noise_precision) data_shape = (minibatch_size,) + X.shape[1::] - data_inputs = {'data': nd.zeros(data_shape, ctx=dev()), - 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())} + data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)), + 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))} initializer = mx.init.Uniform(0.07) sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test, sample_num=300000, initializer=initializer, prior_precision=1.0, - learning_rate=1E-3, L=10, dev=dev()) + learning_rate=1E-3, L=10, dev=dev(gpu_id)) def run_synthetic_SGLD(): @@ -350,21 +350,22 @@ def run_synthetic_SGLD(): help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD") parser.add_argument("-t", "--training", type=int, default=50000, help="Number of training samples") + parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU") args = parser.parse_args() training_num = args.training if args.dataset == 1: if 0 == args.algorithm: - run_mnist_SGD(training_num) + run_mnist_SGD(training_num, gpu_id=args.gpu) elif 1 == args.algorithm: - run_mnist_SGLD(training_num) + run_mnist_SGLD(training_num, gpu_id=args.gpu) else: - run_mnist_DistilledSGLD(training_num) + run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu) elif args.dataset == 0: if 1 == args.algorithm: - run_toy_SGLD() + run_toy_SGLD(gpu_id=args.gpu) elif 2 == args.algorithm: - run_toy_DistilledSGLD() + run_toy_DistilledSGLD(gpu_id=args.gpu) elif 3 == args.algorithm: - run_toy_HMC() + run_toy_HMC(gpu_id=args.gpu) else: run_synthetic_SGLD() diff --git a/example/fcn-xs/README.md b/example/fcn-xs/README.md index 145aa31cb700..49c57fc08eaf 100644 --- a/example/fcn-xs/README.md +++ b/example/fcn-xs/README.md @@ -40,14 +40,33 @@ this is the fully convolution style of the origin Once you completed all these steps, your working directory should contain a ```.\VOC2012``` directory, which contains the following: ```JPEGImages folder```, ```SegmentationClass folder```, ```train.lst```, ```val.lst``` #### Step 3: Train the fcn-xs model -* Based on your hardware, configure GPU or CPU for training in `fcn_xs.py`. It is recommended to use GPU due to the computational complexity and data load. -```python -# ctx = mx.cpu(0) -ctx = mx.gpu(0) +* Based on your hardware, configure CPU or GPU for training by parameter ```--gpu```. It is recommended to use GPU due to the computational complexity and data load. +View parameters we can use with the following command. +```shell +python fcn_xs.py -h + + +usage: fcn_xs.py [-h] [--model MODEL] [--prefix PREFIX] [--epoch EPOCH] + [--init-type INIT_TYPE] [--retrain] [--gpu GPU] + +Convert vgg16 model to vgg16fc model. + +optional arguments: + -h, --help show this help message and exit + --model MODEL The type of fcn-xs model, e.g. fcnxs, fcn16s, fcn8s. + --prefix PREFIX The prefix(include path) of vgg16 model with mxnet + format. + --epoch EPOCH The epoch number of vgg16 model. + --init-type INIT_TYPE + the init type of fcn-xs model, e.g. vgg16, fcnxs + --retrain true means continue training. + --gpu GPU 0 to use GPU, not set to use CPU ``` + * It is recommended to train fcn-32s and fcn-16s before training the fcn-8s model To train the fcn-32s model, run the following: + ```shell python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers --epoch=74 --init-type=vgg16 ``` diff --git a/example/fcn-xs/fcn_xs.py b/example/fcn-xs/fcn_xs.py index 53244a1759c3..5b799f32e46e 100644 --- a/example/fcn-xs/fcn_xs.py +++ b/example/fcn-xs/fcn_xs.py @@ -28,9 +28,10 @@ logger = logging.getLogger() logger.setLevel(logging.INFO) -ctx = mx.gpu(0) + def main(): + ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu) fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536) fcnxs_model_prefix = "model_pascal/FCN32s_VGG16" if args.model == "fcn16s": @@ -85,6 +86,7 @@ def main(): help='the init type of fcn-xs model, e.g. vgg16, fcnxs') parser.add_argument('--retrain', action='store_true', default=False, help='true means continue training.') + parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU") args = parser.parse_args() logging.info(args) main() diff --git a/example/rcnn/README.md b/example/rcnn/README.md index b5284183d160..5e6127ccb08d 100644 --- a/example/rcnn/README.md +++ b/example/rcnn/README.md @@ -9,7 +9,7 @@ For a gluon imperative version, checkout https://github.com/dmlc/gluon-cv. ### Out-of-box inference models Download any of the following models to the current directory and run `python3 demo.py --dataset $Dataset$ --network $Network$ --params $MODEL_FILE$ --image $YOUR_IMAGE$` to get single image inference. -For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU optionally. +For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU, not set to use CPU. Different network has different configuration. Different dataset has different object class names. You must pass them explicitly as command line arguments. | Network | Dataset | Imageset | Reference | Result | Link | diff --git a/example/rcnn/demo.py b/example/rcnn/demo.py index 2315bb8af366..b0a4ddbeab49 100644 --- a/example/rcnn/demo.py +++ b/example/rcnn/demo.py @@ -92,7 +92,7 @@ def parse_args(): parser.add_argument('--params', type=str, default='', help='path to trained model') parser.add_argument('--dataset', type=str, default='voc', help='training dataset') parser.add_argument('--image', type=str, default='', help='path to test image') - parser.add_argument('--gpu', type=str, default='', help='gpu device eg. 0') + parser.add_argument('--gpu', type=str, default='', help='GPU devices, eg."0,1,2,3" , not set to use CPU.') parser.add_argument('--vis', action='store_true', help='display results') parser.add_argument('--vis-thresh', type=float, default=0.7, help='threshold display boxes') # faster rcnn params diff --git a/example/rcnn/test.py b/example/rcnn/test.py index 3c047d222016..e964c9080667 100644 --- a/example/rcnn/test.py +++ b/example/rcnn/test.py @@ -35,7 +35,7 @@ def test_net(sym, imdb, args): logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup context - ctx = mx.gpu(args.gpu) + ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu) # load testing data test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side, @@ -94,7 +94,7 @@ def parse_args(): parser.add_argument('--params', type=str, default='', help='path to trained model') parser.add_argument('--dataset', type=str, default='voc', help='training dataset') parser.add_argument('--imageset', type=str, default='', help='imageset splits') - parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0') + parser.add_argument('--gpu', type=int, default=0, help='0 to use GPU, not set to use CPU') # faster rcnn params parser.add_argument('--img-short-side', type=int, default=600) parser.add_argument('--img-long-side', type=int, default=1000) diff --git a/example/rcnn/train.py b/example/rcnn/train.py index 0739069afb4a..7b1f2f7f31a5 100644 --- a/example/rcnn/train.py +++ b/example/rcnn/train.py @@ -33,7 +33,7 @@ def train_net(sym, roidb, args): logger.info('called with args\n{}'.format(pprint.pformat(vars(args)))) # setup multi-gpu - ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')] + ctx = [mx.cpu()] if not args.gpus else [mx.gpu(int(i)) for i in args.gpus.split(',')] batch_size = args.rcnn_batch_size * len(ctx) # load training data @@ -127,7 +127,7 @@ def parse_args(): parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model') parser.add_argument('--dataset', type=str, default='voc', help='training dataset') parser.add_argument('--imageset', type=str, default='', help='imageset splits') - parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1') + parser.add_argument('--gpus', type=str, help='GPU devices, eg: "0,1,2,3" , not set to use CPU') parser.add_argument('--epochs', type=int, default=10, help='training epochs') parser.add_argument('--lr', type=float, default=0.001, help='base learning rate') parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr')