diff --git a/example/quantization/README.md b/example/quantization/README.md index b77537d4fba7..fc9a26755b4e 100644 --- a/example/quantization/README.md +++ b/example/quantization/README.md @@ -35,6 +35,7 @@ The following models have been tested on Linux systems. |[ResNet152-V2](#8)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/resnet/152-layers/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.76%/93.03%|76.48%/92.96%| |[Inception-BN](#9)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/inception-bn/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|72.09%/90.60%|72.00%/90.53%| | [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | VOC2007/2012 | 0.8366 mAP | 0.8364 mAP | +| [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | COCO2014 | 0.2552 mAP | 0.253 mAP |

ResNet50-V1

diff --git a/example/ssd/README.md b/example/ssd/README.md index 6d4caa481bd7..92a125f1892d 100644 --- a/example/ssd/README.md +++ b/example/ssd/README.md @@ -42,6 +42,7 @@ remarkable traits of MXNet. Due to the permission issue, this example is maintained in this [repository](https://github.com/zhreshold/mxnet-ssd) separately. You can use the link regarding specific per example [issues](https://github.com/zhreshold/mxnet-ssd/issues). ### What's new +* Support training and inference on COCO dataset. Int8 inference achieves 0.253 mAP on CPU with MKL-DNN backend, which is a comparable accuracy to FP32 (0.2552 mAP). * Support uint8 inference on CPU with MKL-DNN backend. Uint8 inference achieves 0.8364 mAP, which is a comparable accuracy to FP32 (0.8366 mAP). * Added live camera capture and detection display (run with --camera flag). Example: `./demo.py --camera --cpu --frame-resize 0.5` @@ -119,9 +120,9 @@ You can use `./demo.py --camera` to use a video capture device with opencv such will open a window that will display the camera output together with the detections. You can play with the detection threshold to get more or less detections. -### Train the model +### Train the model on VOC * Note that we recommend to use gluon-cv to train the model, please refer to [gluon-cv ssd](https://gluon-cv.mxnet.io/build/examples_detection/train_ssd_voc.html). -This example only covers training on Pascal VOC dataset. Other datasets should +This example only covers training on Pascal VOC or MS COCO dataset. Other datasets should be easily supported by adding subclass derived from class `Imdb` in `dataset/imdb.py`. See example of `dataset/pascal_voc.py` for details. * Download the converted pretrained `vgg16_reduced` model [here](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.2-beta/vgg16_reduced.zip), unzip `.param` and `.json` files @@ -166,16 +167,53 @@ Check `python train.py --help` for more training options. For example, if you ha python train.py --gpus 0,1,2,3 --batch-size 32 ``` +### Train the model on COCO +* Download the COCO2014 dataset, skip this step if you already have one. +``` +cd /path/to/where_you_store_datasets/ +wget http://images.cocodataset.org/zips/train2014.zip +wget http://images.cocodataset.org/zips/val2014.zip +wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip +# Extract the data. +unzip train2014.zip +unzip val2014.zip +unzip annotations_trainval2014.zip +``` +* We are going to use `train2014,valminusminival2014` set in COCO2014 for training and `minival2014` for evaluation as a common strategy. +* Then link `COCO2014` folder to `data/coco` by default: +``` +ln -s /path/to/COCO2014 /path/to/incubator-mxnet/example/ssd/data/coco +``` +Use hard link instead of copy could save us a bit disk space. +* Create packed binary file for faster training: +``` +# cd /path/to/incubator-mxnet/example/ssd +bash tools/prepare_coco.sh +# or if you are using windows +python tools/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target ./data/train.lst --root ./data/coco +python tools/prepare_dataset.py --dataset coco --set minival2014 --target ./data/val.lst --root ./data/coco --no-shuffle +``` +* Start training: +``` +# cd /path/to/incubator-mxnet/example/ssd +python train.py --label-width=560 --num-class=80 --class-names=./dataset/names/coco_label --pretrained="" --num-example=117265 --batch-size=64 +``` + ### Evalute trained model Make sure you have val.rec as validation dataset. It's the same one as used in training. Use: ``` # cd /path/to/incubator-mxnet/example/ssd python evaluate.py --gpus 0,1 --batch-size 128 --epoch 0 + +# Evaluate on COCO dataset +python evaluate.py --gpus 0,1 --batch-size 128 --epoch 0 --num-class=80 --class-names=./dataset/names/mscoco.names ``` ### Quantize model -Follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd-val-fc19a535.idx` to `val.idx`, `ssd-val-fc19a535.lst` to `val.lst`, `ssd-val-fc19a535.rec` to `val.rec`, `ssd_vgg16_reduced_300-dd479559.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-dd479559.json` to `ssd_vgg16_reduced_300-symbol.json`.) +To quantize a model on VOC dataset, follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model-on-VOC) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd-val-fc19a535.idx` to `val.idx`, `ssd-val-fc19a535.lst` to `val.lst`, `ssd-val-fc19a535.rec` to `val.rec`, `ssd_vgg16_reduced_300-dd479559.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-dd479559.json` to `ssd_vgg16_reduced_300-symbol.json`.) + +To quantize a model on COCO dataset, follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model-on-COCO) to train a FP32 `SSD-VGG16_reduced_300x300` model based on COCO dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-7fedd4ad.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd_coco-val-e91096e8.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd_coco-val-e91096e8.idx` to `val.idx`, `ssd_coco-val-e91096e8.lst` to `val.lst`, `ssd_coco-val-e91096e8.rec` to `val.rec`, `ssd_vgg16_reduced_300-7fedd4ad.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-7fedd4ad.json` to `ssd_vgg16_reduced_300-symbol.json`.) ``` data/ @@ -199,12 +237,20 @@ After quantization, INT8 models will be saved in `model/` dictionary. Use the f # USE MKLDNN AS SUBGRAPH BACKEND export MXNET_SUBGRAPH_BACKEND=MKLDNN -# Launch FP32 Inference +# Launch FP32 Inference on VOC dataset python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_ -# Launch INT8 Inference +# Launch INT8 Inference on VOC dataset python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_ +# Launch FP32 Inference on COCO dataset + +python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_ --num-class=80 --class-names=./dataset/names/mscoco.names + +# Launch INT8 Inference on COCO dataset + +python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_ --num-class=80 --class-names=./dataset/names/mscoco.names + # Launch dummy data Inference python benchmark_score.py --deploy --prefix=./model/ssd_ python benchmark_score.py --deploy --prefix=./model/cqssd_ diff --git a/example/ssd/dataset/mscoco.py b/example/ssd/dataset/mscoco.py index 469a15ae2720..dbe6e6909f4d 100644 --- a/example/ssd/dataset/mscoco.py +++ b/example/ssd/dataset/mscoco.py @@ -97,6 +97,12 @@ def _load_all(self, anno_file, shuffle): labels = [] coco = COCO(anno_file) img_ids = coco.getImgIds() + # deal with class names + cats = [cat['name'] for cat in coco.loadCats(coco.getCatIds())] + class_to_coco_ind = dict(zip(cats, coco.getCatIds())) + class_to_ind = dict(zip(self.classes, range(len(self.classes)))) + coco_ind_to_class_ind = dict([(class_to_coco_ind[cls], class_to_ind[cls]) + for cls in self.classes[0:]]) for img_id in img_ids: # filename image_info = coco.loadImgs(img_id)[0] @@ -109,7 +115,7 @@ def _load_all(self, anno_file, shuffle): annos = coco.loadAnns(anno_ids) label = [] for anno in annos: - cat_id = int(anno["category_id"]) + cat_id = coco_ind_to_class_ind[anno['category_id']] bbox = anno["bbox"] assert len(bbox) == 4 xmin = float(bbox[0]) / width @@ -123,7 +129,7 @@ def _load_all(self, anno_file, shuffle): if shuffle: import random - indices = range(len(image_set_index)) + indices = list(range(len(image_set_index))) random.shuffle(indices) image_set_index = [image_set_index[i] for i in indices] labels = [labels[i] for i in indices] diff --git a/example/ssd/dataset/names/mscoco.names b/example/ssd/dataset/names/mscoco.names index ca76c80b5b2c..941cb4e13922 100644 --- a/example/ssd/dataset/names/mscoco.names +++ b/example/ssd/dataset/names/mscoco.names @@ -1,8 +1,8 @@ person bicycle car -motorbike -aeroplane +motorcycle +airplane bus train truck @@ -55,12 +55,12 @@ pizza donut cake chair -sofa -pottedplant +couch +potted plant bed -diningtable +dining table toilet -tvmonitor +tv laptop mouse remote diff --git a/example/ssd/symbol/legacy_vgg16_ssd_300.py b/example/ssd/symbol/legacy_vgg16_ssd_300.py index 29fc30be65d4..0acac6e4294b 100644 --- a/example/ssd/symbol/legacy_vgg16_ssd_300.py +++ b/example/ssd/symbol/legacy_vgg16_ssd_300.py @@ -200,8 +200,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, loc_preds = net.get_internals()["multibox_loc_pred_output"] anchor_boxes = net.get_internals()["multibox_anchors_output"] - cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ - name='cls_prob') + cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob') out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py index c5c3095dfd77..74d6b37fc11e 100644 --- a/example/ssd/symbol/legacy_vgg16_ssd_512.py +++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py @@ -203,8 +203,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=40 loc_preds = net.get_internals()["multibox_loc_pred_output"] anchor_boxes = net.get_internals()["multibox_anchors_output"] - cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ - name='cls_prob') + cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob') out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py index 041f83eb44da..135c42e8be15 100644 --- a/example/ssd/symbol/symbol_builder.py +++ b/example/ssd/symbol/symbol_builder.py @@ -175,8 +175,7 @@ def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios, num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) - cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ - name='cls_prob') + cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob') out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) diff --git a/example/ssd/train.py b/example/ssd/train.py index 09c618a96429..5965aeec6c7a 100755 --- a/example/ssd/train.py +++ b/example/ssd/train.py @@ -103,6 +103,8 @@ def parse_args(): help='use difficult ground-truths in evaluation') parser.add_argument('--no-voc07', dest='use_voc07_metric', action='store_false', help='dont use PASCAL VOC 07 11-point metric') + parser.add_argument('--kv-store', type=str, default='local', + help='key-value store type') args = parser.parse_args() return args @@ -150,4 +152,5 @@ def parse_class_names(args): force_nms=args.force_nms, ovp_thresh=args.overlap_thresh, use_difficult=args.use_difficult, - voc07_metric=args.use_voc07_metric) + voc07_metric=args.use_voc07_metric, + kv_store=args.kv_store) diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py index 731f8fcc19f4..eeb9796bf4a8 100644 --- a/example/ssd/train/metric.py +++ b/example/ssd/train/metric.py @@ -39,6 +39,17 @@ def reset(self): self.num_inst = [0] * self.num self.sum_metric = [0.0] * self.num + def reset_local(self): + """ + override reset behavior + """ + if getattr(self, 'num', None) is None: + self.num_inst = 0 + self.sum_metric = 0.0 + else: + self.num_inst = [0] * self.num + self.sum_metric = [0.0] * self.num + def update(self, labels, preds): """ Implementation of updating metrics diff --git a/example/ssd/train/train_net.py b/example/ssd/train/train_net.py index 304a43b3d949..b37e3d5abcec 100644 --- a/example/ssd/train/train_net.py +++ b/example/ssd/train/train_net.py @@ -97,7 +97,7 @@ def train_net(net, train_path, num_classes, batch_size, use_difficult=False, class_names=None, voc07_metric=False, nms_topk=400, force_suppress=False, train_list="", val_path="", val_list="", iter_monitor=0, - monitor_pattern=".*", log_file=None): + monitor_pattern=".*", log_file=None, kv_store=None): """ Wrapper for training phase. @@ -258,6 +258,9 @@ def train_net(net, train_path, num_classes, batch_size, else: valid_metric = MApMetric(ovp_thresh, use_difficult, class_names, pred_idx=3) + # create kvstore when there are gpus + kv = mx.kvstore.create(kv_store) if kv_store else None + mod.fit(train_iter, val_iter, eval_metric=MultiBoxMetric(), @@ -272,4 +275,5 @@ def train_net(net, train_path, num_classes, batch_size, arg_params=args, aux_params=auxs, allow_missing=True, - monitor=monitor) + monitor=monitor, + kvstore=kv)