Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Code modification for testcases of various network models in directory example #12498

Merged
merged 25 commits into from
Jan 11, 2019
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions example/bayesian-methods/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,27 @@ and *Bayesian Dark Knowledge (BDK)* [<cite>(Balan, Rathod, Murphy and Welling, 2
**bdk.ipynb** shows how to use MXNet to implement the DistilledSGLD algorithm in Bayesian Dark Knowledge.

**bdk_demo.py** contains scripts (more than the notebook) related to Bayesian Dark Knowledge. Use `python bdk_demo.py -d 1 -l 2 -t 50000` to run classification on MNIST.

View parameters we can use with the following command.

```shell
python bdk_demo.py -h


usage: bdk_demo.py [-h] [-d DATASET] [-l ALGORITHM] [-t TRAINING] [--gpu GPU]

Examples in the paper [NIPS2015]Bayesian Dark Knowledge and [ICML2011]Bayesian
Learning via Stochastic Gradient Langevin Dynamics

optional arguments:
-h, --help show this help message and exit
-d DATASET, --dataset DATASET
Dataset to use. 0 --> TOY, 1 --> MNIST, 2 -->
Synthetic Data in the SGLD paper
-l ALGORITHM, --algorithm ALGORITHM
Type of algorithm to use. 0 --> SGD, 1 --> SGLD,
other-->DistilledSGLD
-t TRAINING, --training TRAINING
Number of training samples
--gpu GPU 0 to use GPU, not set to use CPU
```
73 changes: 37 additions & 36 deletions example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,42 +156,42 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None):
return net


def dev():
return mx.gpu()
def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()


def run_mnist_SGD(training_num=50000):
def run_mnist_SGD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
lr=5E-6, prior_precision=1.0, minibatch_size=100)


def run_mnist_SGLD(training_num=50000):
def run_mnist_SGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, sample_pool = SGLD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
thin_interval=100, burn_in_iter_num=1000)


def run_mnist_DistilledSGLD(training_num=50000):
def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
if training_num >= 10000:
Expand All @@ -214,10 +214,10 @@ def run_mnist_DistilledSGLD(training_num=50000):
logsoftmax = LogSoftmax()
student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))}
teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
student_initializer = BiasXavier(factor_type="in", magnitude=1)
student_exe, student_params, _ = \
Expand All @@ -231,17 +231,17 @@ def run_mnist_DistilledSGLD(training_num=50000):
teacher_learning_rate=teacher_learning_rate,
student_learning_rate=student_learning_rate,
teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))


def run_toy_SGLD():
def run_toy_SGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
net = get_toy_sym(True, teacher_noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
Expand All @@ -253,20 +253,20 @@ def run_toy_SGLD():
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size, dev=dev())
minibatch_size=minibatch_size, dev=dev(gpu_id))


def run_toy_DistilledSGLD():
def run_toy_DistilledSGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
teacher_net = get_toy_sym(True, teacher_noise_precision)
student_net = get_toy_sym(False)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())}
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id))}

teacher_initializer = mx.init.Uniform(0.07)
student_initializer = mx.init.Uniform(0.07)
student_grad_f = lambda student_outputs, teacher_pred: \
Expand All @@ -284,21 +284,21 @@ def run_toy_DistilledSGLD():
student_grad_f=student_grad_f,
teacher_prior_precision=0.1, student_prior_precision=0.001,
perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression',
dev=dev())
dev=dev(gpu_id))


def run_toy_HMC():
def run_toy_HMC(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
net = get_toy_sym(True, noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
sample_num=300000, initializer=initializer, prior_precision=1.0,
learning_rate=1E-3, L=10, dev=dev())
learning_rate=1E-3, L=10, dev=dev(gpu_id))


def run_synthetic_SGLD():
Expand Down Expand Up @@ -350,21 +350,22 @@ def run_synthetic_SGLD():
help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD")
parser.add_argument("-t", "--training", type=int, default=50000,
help="Number of training samples")
parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
run_mnist_SGD(training_num)
run_mnist_SGD(training_num, gpu_id=args.gpu)
elif 1 == args.algorithm:
run_mnist_SGLD(training_num)
run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
run_mnist_DistilledSGLD(training_num)
run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
run_toy_SGLD()
run_toy_SGLD(gpu_id=args.gpu)
elif 2 == args.algorithm:
run_toy_DistilledSGLD()
run_toy_DistilledSGLD(gpu_id=args.gpu)
elif 3 == args.algorithm:
run_toy_HMC()
run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
27 changes: 23 additions & 4 deletions example/fcn-xs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,33 @@ this is the fully convolution style of the origin
Once you completed all these steps, your working directory should contain a ```.\VOC2012``` directory, which contains the following: ```JPEGImages folder```, ```SegmentationClass folder```, ```train.lst```, ```val.lst```

#### Step 3: Train the fcn-xs model
* Based on your hardware, configure GPU or CPU for training in `fcn_xs.py`. It is recommended to use GPU due to the computational complexity and data load.
```python
# ctx = mx.cpu(0)
ctx = mx.gpu(0)
* Based on your hardware, configure CPU or GPU for training by parameter ```--gpu```. It is recommended to use GPU due to the computational complexity and data load.
View parameters we can use with the following command.
```shell
python fcn_xs.py -h


usage: fcn_xs.py [-h] [--model MODEL] [--prefix PREFIX] [--epoch EPOCH]
[--init-type INIT_TYPE] [--retrain] [--gpu GPU]

Convert vgg16 model to vgg16fc model.

optional arguments:
-h, --help show this help message and exit
--model MODEL The type of fcn-xs model, e.g. fcnxs, fcn16s, fcn8s.
--prefix PREFIX The prefix(include path) of vgg16 model with mxnet
format.
--epoch EPOCH The epoch number of vgg16 model.
--init-type INIT_TYPE
the init type of fcn-xs model, e.g. vgg16, fcnxs
--retrain true means continue training.
--gpu GPU 0 to use GPU, not set to use CPU
```

* It is recommended to train fcn-32s and fcn-16s before training the fcn-8s model

To train the fcn-32s model, run the following:

```shell
python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers --epoch=74 --init-type=vgg16
```
Expand Down
4 changes: 3 additions & 1 deletion example/fcn-xs/fcn_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@

logger = logging.getLogger()
logger.setLevel(logging.INFO)
ctx = mx.gpu(0)


def main():
ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)
fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536)
fcnxs_model_prefix = "model_pascal/FCN32s_VGG16"
if args.model == "fcn16s":
Expand Down Expand Up @@ -85,6 +86,7 @@ def main():
help='the init type of fcn-xs model, e.g. vgg16, fcnxs')
parser.add_argument('--retrain', action='store_true', default=False,
help='true means continue training.')
parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
logging.info(args)
main()
2 changes: 1 addition & 1 deletion example/rcnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ For a gluon imperative version, checkout https://github.com/dmlc/gluon-cv.

### Out-of-box inference models
Download any of the following models to the current directory and run `python3 demo.py --dataset $Dataset$ --network $Network$ --params $MODEL_FILE$ --image $YOUR_IMAGE$` to get single image inference.
For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU optionally.
For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU, not set to use CPU.
Different network has different configuration. Different dataset has different object class names. You must pass them explicitly as command line arguments.

| Network | Dataset | Imageset | Reference | Result | Link |
Expand Down
2 changes: 1 addition & 1 deletion example/rcnn/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--image', type=str, default='', help='path to test image')
parser.add_argument('--gpu', type=str, default='', help='gpu device eg. 0')
parser.add_argument('--gpu', type=str, default='', help='GPU devices, eg."0,1,2,3" , not set to use CPU.')
parser.add_argument('--vis', action='store_true', help='display results')
parser.add_argument('--vis-thresh', type=float, default=0.7, help='threshold display boxes')
# faster rcnn params
Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_net(sym, imdb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup context
ctx = mx.gpu(args.gpu)
ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)

# load testing data
test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side,
Expand Down Expand Up @@ -94,7 +94,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0')
parser.add_argument('--gpu', type=int, default=0, help='0 to use GPU, not set to use CPU')
# faster rcnn params
parser.add_argument('--img-short-side', type=int, default=600)
parser.add_argument('--img-long-side', type=int, default=1000)
Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def train_net(sym, roidb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup multi-gpu
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
ctx = [mx.cpu()] if not args.gpus else [mx.gpu(int(i)) for i in args.gpus.split(',')]
batch_size = args.rcnn_batch_size * len(ctx)

# load training data
Expand Down Expand Up @@ -127,7 +127,7 @@ def parse_args():
parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1')
parser.add_argument('--gpus', type=str, help='GPU devices, eg: "0,1,2,3" , not set to use CPU')
parser.add_argument('--epochs', type=int, default=10, help='training epochs')
parser.add_argument('--lr', type=float, default=0.001, help='base learning rate')
parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr')
Expand Down