Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Code modification for testcases of various network models in director…
Browse files Browse the repository at this point in the history
…y example (#12498)

* example testcase modified

* rcnn file add

* license add

* license init

* CI test trigger

* rcnn modify give up

* trigger

* modify for better user experience

* change the default parameter to xpu=None

* Update bdk_demo.py

* Update fcn_xs.py

* Update test.py

* Update train.py

* Update bdk_demo.py

* Update bdk_demo.py

* modify review comments

* refine

* modify Readmes according to the changed code.

* finetune READMEs

* re-trigger ci

* re-trigger ci twice
  • Loading branch information
luobao-intel authored and TaoLv committed Jan 11, 2019
1 parent 9c3253d commit a6ed619
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 47 deletions.
24 changes: 24 additions & 0 deletions example/bayesian-methods/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,27 @@ and *Bayesian Dark Knowledge (BDK)* [<cite>(Balan, Rathod, Murphy and Welling, 2
**bdk.ipynb** shows how to use MXNet to implement the DistilledSGLD algorithm in Bayesian Dark Knowledge.

**bdk_demo.py** contains scripts (more than the notebook) related to Bayesian Dark Knowledge. Use `python bdk_demo.py -d 1 -l 2 -t 50000` to run classification on MNIST.

View parameters we can use with the following command.

```shell
python bdk_demo.py -h


usage: bdk_demo.py [-h] [-d DATASET] [-l ALGORITHM] [-t TRAINING] [--gpu GPU]

Examples in the paper [NIPS2015]Bayesian Dark Knowledge and [ICML2011]Bayesian
Learning via Stochastic Gradient Langevin Dynamics

optional arguments:
-h, --help show this help message and exit
-d DATASET, --dataset DATASET
Dataset to use. 0 --> TOY, 1 --> MNIST, 2 -->
Synthetic Data in the SGLD paper
-l ALGORITHM, --algorithm ALGORITHM
Type of algorithm to use. 0 --> SGD, 1 --> SGLD,
other-->DistilledSGLD
-t TRAINING, --training TRAINING
Number of training samples
--gpu GPU 0 to use GPU, not set to use CPU
```
73 changes: 37 additions & 36 deletions example/bayesian-methods/bdk_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,42 +156,42 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None):
return net


def dev():
return mx.gpu()
def dev(gpu_id=None):
return mx.gpu(gpu_id) if gpu_id else mx.cpu()


def run_mnist_SGD(training_num=50000):
def run_mnist_SGD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
lr=5E-6, prior_precision=1.0, minibatch_size=100)


def run_mnist_SGLD(training_num=50000):
def run_mnist_SGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
exe, sample_pool = SGLD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
learning_rate=4E-6, prior_precision=1.0, minibatch_size=100,
thin_interval=100, burn_in_iter_num=1000)


def run_mnist_DistilledSGLD(training_num=50000):
def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
if training_num >= 10000:
Expand All @@ -214,10 +214,10 @@ def run_mnist_DistilledSGLD(training_num=50000):
logsoftmax = LogSoftmax()
student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))}
teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
student_initializer = BiasXavier(factor_type="in", magnitude=1)
student_exe, student_params, _ = \
Expand All @@ -231,17 +231,17 @@ def run_mnist_DistilledSGLD(training_num=50000):
teacher_learning_rate=teacher_learning_rate,
student_learning_rate=student_learning_rate,
teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))


def run_toy_SGLD():
def run_toy_SGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
net = get_toy_sym(True, teacher_noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
Expand All @@ -253,20 +253,20 @@ def run_toy_SGLD():
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
minibatch_size=minibatch_size, dev=dev())
minibatch_size=minibatch_size, dev=dev(gpu_id))


def run_toy_DistilledSGLD():
def run_toy_DistilledSGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
teacher_net = get_toy_sym(True, teacher_noise_precision)
student_net = get_toy_sym(False)
data_shape = (minibatch_size,) + X.shape[1::]
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())}
# 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id))}

teacher_initializer = mx.init.Uniform(0.07)
student_initializer = mx.init.Uniform(0.07)
student_grad_f = lambda student_outputs, teacher_pred: \
Expand All @@ -284,21 +284,21 @@ def run_toy_DistilledSGLD():
student_grad_f=student_grad_f,
teacher_prior_precision=0.1, student_prior_precision=0.001,
perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression',
dev=dev())
dev=dev(gpu_id))


def run_toy_HMC():
def run_toy_HMC(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
net = get_toy_sym(True, noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
sample_num=300000, initializer=initializer, prior_precision=1.0,
learning_rate=1E-3, L=10, dev=dev())
learning_rate=1E-3, L=10, dev=dev(gpu_id))


def run_synthetic_SGLD():
Expand Down Expand Up @@ -350,21 +350,22 @@ def run_synthetic_SGLD():
help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD")
parser.add_argument("-t", "--training", type=int, default=50000,
help="Number of training samples")
parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
run_mnist_SGD(training_num)
run_mnist_SGD(training_num, gpu_id=args.gpu)
elif 1 == args.algorithm:
run_mnist_SGLD(training_num)
run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
run_mnist_DistilledSGLD(training_num)
run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
run_toy_SGLD()
run_toy_SGLD(gpu_id=args.gpu)
elif 2 == args.algorithm:
run_toy_DistilledSGLD()
run_toy_DistilledSGLD(gpu_id=args.gpu)
elif 3 == args.algorithm:
run_toy_HMC()
run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
27 changes: 23 additions & 4 deletions example/fcn-xs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,33 @@ this is the fully convolution style of the origin
Once you completed all these steps, your working directory should contain a ```.\VOC2012``` directory, which contains the following: ```JPEGImages folder```, ```SegmentationClass folder```, ```train.lst```, ```val.lst```

#### Step 3: Train the fcn-xs model
* Based on your hardware, configure GPU or CPU for training in `fcn_xs.py`. It is recommended to use GPU due to the computational complexity and data load.
```python
# ctx = mx.cpu(0)
ctx = mx.gpu(0)
* Based on your hardware, configure CPU or GPU for training by parameter ```--gpu```. It is recommended to use GPU due to the computational complexity and data load.
View parameters we can use with the following command.
```shell
python fcn_xs.py -h


usage: fcn_xs.py [-h] [--model MODEL] [--prefix PREFIX] [--epoch EPOCH]
[--init-type INIT_TYPE] [--retrain] [--gpu GPU]

Convert vgg16 model to vgg16fc model.

optional arguments:
-h, --help show this help message and exit
--model MODEL The type of fcn-xs model, e.g. fcnxs, fcn16s, fcn8s.
--prefix PREFIX The prefix(include path) of vgg16 model with mxnet
format.
--epoch EPOCH The epoch number of vgg16 model.
--init-type INIT_TYPE
the init type of fcn-xs model, e.g. vgg16, fcnxs
--retrain true means continue training.
--gpu GPU 0 to use GPU, not set to use CPU
```

* It is recommended to train fcn-32s and fcn-16s before training the fcn-8s model

To train the fcn-32s model, run the following:

```shell
python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers --epoch=74 --init-type=vgg16
```
Expand Down
4 changes: 3 additions & 1 deletion example/fcn-xs/fcn_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@

logger = logging.getLogger()
logger.setLevel(logging.INFO)
ctx = mx.gpu(0)


def main():
ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)
fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536)
fcnxs_model_prefix = "model_pascal/FCN32s_VGG16"
if args.model == "fcn16s":
Expand Down Expand Up @@ -85,6 +86,7 @@ def main():
help='the init type of fcn-xs model, e.g. vgg16, fcnxs')
parser.add_argument('--retrain', action='store_true', default=False,
help='true means continue training.')
parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
logging.info(args)
main()
2 changes: 1 addition & 1 deletion example/rcnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ For a gluon imperative version, checkout https://github.com/dmlc/gluon-cv.

### Out-of-box inference models
Download any of the following models to the current directory and run `python3 demo.py --dataset $Dataset$ --network $Network$ --params $MODEL_FILE$ --image $YOUR_IMAGE$` to get single image inference.
For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU optionally.
For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU, not set to use CPU.
Different network has different configuration. Different dataset has different object class names. You must pass them explicitly as command line arguments.

| Network | Dataset | Imageset | Reference | Result | Link |
Expand Down
2 changes: 1 addition & 1 deletion example/rcnn/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--image', type=str, default='', help='path to test image')
parser.add_argument('--gpu', type=str, default='', help='gpu device eg. 0')
parser.add_argument('--gpu', type=str, default='', help='GPU devices, eg."0,1,2,3" , not set to use CPU.')
parser.add_argument('--vis', action='store_true', help='display results')
parser.add_argument('--vis-thresh', type=float, default=0.7, help='threshold display boxes')
# faster rcnn params
Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_net(sym, imdb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup context
ctx = mx.gpu(args.gpu)
ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)

# load testing data
test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side,
Expand Down Expand Up @@ -94,7 +94,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0')
parser.add_argument('--gpu', type=int, default=0, help='0 to use GPU, not set to use CPU')
# faster rcnn params
parser.add_argument('--img-short-side', type=int, default=600)
parser.add_argument('--img-long-side', type=int, default=1000)
Expand Down
4 changes: 2 additions & 2 deletions example/rcnn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def train_net(sym, roidb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

# setup multi-gpu
ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
ctx = [mx.cpu()] if not args.gpus else [mx.gpu(int(i)) for i in args.gpus.split(',')]
batch_size = args.rcnn_batch_size * len(ctx)

# load training data
Expand Down Expand Up @@ -127,7 +127,7 @@ def parse_args():
parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1')
parser.add_argument('--gpus', type=str, help='GPU devices, eg: "0,1,2,3" , not set to use CPU')
parser.add_argument('--epochs', type=int, default=10, help='training epochs')
parser.add_argument('--lr', type=float, default=0.001, help='base learning rate')
parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr')
Expand Down

0 comments on commit a6ed619

Please sign in to comment.