diff --git a/example/quantization/README.md b/example/quantization/README.md
index b77537d4fba7..fc9a26755b4e 100644
--- a/example/quantization/README.md
+++ b/example/quantization/README.md
@@ -35,6 +35,7 @@ The following models have been tested on Linux systems.
|[ResNet152-V2](#8)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/resnet/152-layers/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|76.76%/93.03%|76.48%/92.96%|
|[Inception-BN](#9)|[MXNet ModelZoo](http://data.mxnet.io/models/imagenet/inception-bn/)|[Validation Dataset](http://data.mxnet.io/data/val_256_q90.rec)|72.09%/90.60%|72.00%/90.53%|
| [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | VOC2007/2012 | 0.8366 mAP | 0.8364 mAP |
+| [SSD-VGG16](#10) | [example/ssd](https://github.com/apache/incubator-mxnet/tree/master/example/ssd) | COCO2014 | 0.2552 mAP | 0.253 mAP |
ResNet50-V1
diff --git a/example/ssd/README.md b/example/ssd/README.md
index 6d4caa481bd7..92a125f1892d 100644
--- a/example/ssd/README.md
+++ b/example/ssd/README.md
@@ -42,6 +42,7 @@ remarkable traits of MXNet.
Due to the permission issue, this example is maintained in this [repository](https://github.com/zhreshold/mxnet-ssd) separately. You can use the link regarding specific per example [issues](https://github.com/zhreshold/mxnet-ssd/issues).
### What's new
+* Support training and inference on COCO dataset. Int8 inference achieves 0.253 mAP on CPU with MKL-DNN backend, which is a comparable accuracy to FP32 (0.2552 mAP).
* Support uint8 inference on CPU with MKL-DNN backend. Uint8 inference achieves 0.8364 mAP, which is a comparable accuracy to FP32 (0.8366 mAP).
* Added live camera capture and detection display (run with --camera flag). Example:
`./demo.py --camera --cpu --frame-resize 0.5`
@@ -119,9 +120,9 @@ You can use `./demo.py --camera` to use a video capture device with opencv such
will open a window that will display the camera output together with the detections. You can play
with the detection threshold to get more or less detections.
-### Train the model
+### Train the model on VOC
* Note that we recommend to use gluon-cv to train the model, please refer to [gluon-cv ssd](https://gluon-cv.mxnet.io/build/examples_detection/train_ssd_voc.html).
-This example only covers training on Pascal VOC dataset. Other datasets should
+This example only covers training on Pascal VOC or MS COCO dataset. Other datasets should
be easily supported by adding subclass derived from class `Imdb` in `dataset/imdb.py`.
See example of `dataset/pascal_voc.py` for details.
* Download the converted pretrained `vgg16_reduced` model [here](https://github.com/zhreshold/mxnet-ssd/releases/download/v0.2-beta/vgg16_reduced.zip), unzip `.param` and `.json` files
@@ -166,16 +167,53 @@ Check `python train.py --help` for more training options. For example, if you ha
python train.py --gpus 0,1,2,3 --batch-size 32
```
+### Train the model on COCO
+* Download the COCO2014 dataset, skip this step if you already have one.
+```
+cd /path/to/where_you_store_datasets/
+wget http://images.cocodataset.org/zips/train2014.zip
+wget http://images.cocodataset.org/zips/val2014.zip
+wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
+# Extract the data.
+unzip train2014.zip
+unzip val2014.zip
+unzip annotations_trainval2014.zip
+```
+* We are going to use `train2014,valminusminival2014` set in COCO2014 for training and `minival2014` for evaluation as a common strategy.
+* Then link `COCO2014` folder to `data/coco` by default:
+```
+ln -s /path/to/COCO2014 /path/to/incubator-mxnet/example/ssd/data/coco
+```
+Use hard link instead of copy could save us a bit disk space.
+* Create packed binary file for faster training:
+```
+# cd /path/to/incubator-mxnet/example/ssd
+bash tools/prepare_coco.sh
+# or if you are using windows
+python tools/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target ./data/train.lst --root ./data/coco
+python tools/prepare_dataset.py --dataset coco --set minival2014 --target ./data/val.lst --root ./data/coco --no-shuffle
+```
+* Start training:
+```
+# cd /path/to/incubator-mxnet/example/ssd
+python train.py --label-width=560 --num-class=80 --class-names=./dataset/names/coco_label --pretrained="" --num-example=117265 --batch-size=64
+```
+
### Evalute trained model
Make sure you have val.rec as validation dataset. It's the same one as used in training. Use:
```
# cd /path/to/incubator-mxnet/example/ssd
python evaluate.py --gpus 0,1 --batch-size 128 --epoch 0
+
+# Evaluate on COCO dataset
+python evaluate.py --gpus 0,1 --batch-size 128 --epoch 0 --num-class=80 --class-names=./dataset/names/mscoco.names
```
### Quantize model
-Follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd-val-fc19a535.idx` to `val.idx`, `ssd-val-fc19a535.lst` to `val.lst`, `ssd-val-fc19a535.rec` to `val.rec`, `ssd_vgg16_reduced_300-dd479559.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-dd479559.json` to `ssd_vgg16_reduced_300-symbol.json`.)
+To quantize a model on VOC dataset, follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model-on-VOC) to train a FP32 `SSD-VGG16_reduced_300x300` model based on Pascal VOC dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-dd479559.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd-val-fc19a535.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd-val-fc19a535.idx` to `val.idx`, `ssd-val-fc19a535.lst` to `val.lst`, `ssd-val-fc19a535.rec` to `val.rec`, `ssd_vgg16_reduced_300-dd479559.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-dd479559.json` to `ssd_vgg16_reduced_300-symbol.json`.)
+
+To quantize a model on COCO dataset, follow the [Train instructions](https://github.com/apache/incubator-mxnet/tree/master/example/ssd#train-the-model-on-COCO) to train a FP32 `SSD-VGG16_reduced_300x300` model based on COCO dataset. You can also download our [SSD-VGG16 pre-trained model](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_vgg16_reduced_300-7fedd4ad.zip) and [packed binary data](http://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/ssd_coco-val-e91096e8.zip). Create `model` and `data` directories if they're not exist, extract the zip files, then rename the uncompressed files as follows (eg, rename `ssd_coco-val-e91096e8.idx` to `val.idx`, `ssd_coco-val-e91096e8.lst` to `val.lst`, `ssd_coco-val-e91096e8.rec` to `val.rec`, `ssd_vgg16_reduced_300-7fedd4ad.params` to `ssd_vgg16_reduced_300-0000.params`, `ssd_vgg16_reduced_300-symbol-7fedd4ad.json` to `ssd_vgg16_reduced_300-symbol.json`.)
```
data/
@@ -199,12 +237,20 @@ After quantization, INT8 models will be saved in `model/` dictionary. Use the f
# USE MKLDNN AS SUBGRAPH BACKEND
export MXNET_SUBGRAPH_BACKEND=MKLDNN
-# Launch FP32 Inference
+# Launch FP32 Inference on VOC dataset
python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_
-# Launch INT8 Inference
+# Launch INT8 Inference on VOC dataset
python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_
+# Launch FP32 Inference on COCO dataset
+
+python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/ssd_ --num-class=80 --class-names=./dataset/names/mscoco.names
+
+# Launch INT8 Inference on COCO dataset
+
+python evaluate.py --cpu --num-batch 10 --batch-size 224 --deploy --prefix=./model/cqssd_ --num-class=80 --class-names=./dataset/names/mscoco.names
+
# Launch dummy data Inference
python benchmark_score.py --deploy --prefix=./model/ssd_
python benchmark_score.py --deploy --prefix=./model/cqssd_
diff --git a/example/ssd/dataset/mscoco.py b/example/ssd/dataset/mscoco.py
index 469a15ae2720..dbe6e6909f4d 100644
--- a/example/ssd/dataset/mscoco.py
+++ b/example/ssd/dataset/mscoco.py
@@ -97,6 +97,12 @@ def _load_all(self, anno_file, shuffle):
labels = []
coco = COCO(anno_file)
img_ids = coco.getImgIds()
+ # deal with class names
+ cats = [cat['name'] for cat in coco.loadCats(coco.getCatIds())]
+ class_to_coco_ind = dict(zip(cats, coco.getCatIds()))
+ class_to_ind = dict(zip(self.classes, range(len(self.classes))))
+ coco_ind_to_class_ind = dict([(class_to_coco_ind[cls], class_to_ind[cls])
+ for cls in self.classes[0:]])
for img_id in img_ids:
# filename
image_info = coco.loadImgs(img_id)[0]
@@ -109,7 +115,7 @@ def _load_all(self, anno_file, shuffle):
annos = coco.loadAnns(anno_ids)
label = []
for anno in annos:
- cat_id = int(anno["category_id"])
+ cat_id = coco_ind_to_class_ind[anno['category_id']]
bbox = anno["bbox"]
assert len(bbox) == 4
xmin = float(bbox[0]) / width
@@ -123,7 +129,7 @@ def _load_all(self, anno_file, shuffle):
if shuffle:
import random
- indices = range(len(image_set_index))
+ indices = list(range(len(image_set_index)))
random.shuffle(indices)
image_set_index = [image_set_index[i] for i in indices]
labels = [labels[i] for i in indices]
diff --git a/example/ssd/dataset/names/mscoco.names b/example/ssd/dataset/names/mscoco.names
index ca76c80b5b2c..941cb4e13922 100644
--- a/example/ssd/dataset/names/mscoco.names
+++ b/example/ssd/dataset/names/mscoco.names
@@ -1,8 +1,8 @@
person
bicycle
car
-motorbike
-aeroplane
+motorcycle
+airplane
bus
train
truck
@@ -55,12 +55,12 @@ pizza
donut
cake
chair
-sofa
-pottedplant
+couch
+potted plant
bed
-diningtable
+dining table
toilet
-tvmonitor
+tv
laptop
mouse
remote
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_300.py b/example/ssd/symbol/legacy_vgg16_ssd_300.py
index 29fc30be65d4..0acac6e4294b 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_300.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_300.py
@@ -200,8 +200,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False,
loc_preds = net.get_internals()["multibox_loc_pred_output"]
anchor_boxes = net.get_internals()["multibox_anchors_output"]
- cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
- name='cls_prob')
+ cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob')
out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py
index c5c3095dfd77..74d6b37fc11e 100644
--- a/example/ssd/symbol/legacy_vgg16_ssd_512.py
+++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py
@@ -203,8 +203,7 @@ def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=40
loc_preds = net.get_internals()["multibox_loc_pred_output"]
anchor_boxes = net.get_internals()["multibox_anchors_output"]
- cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
- name='cls_prob')
+ cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob')
out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py
index 041f83eb44da..135c42e8be15 100644
--- a/example/ssd/symbol/symbol_builder.py
+++ b/example/ssd/symbol/symbol_builder.py
@@ -175,8 +175,7 @@ def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
- cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
- name='cls_prob')
+ cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob')
out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
diff --git a/example/ssd/train.py b/example/ssd/train.py
index 09c618a96429..5965aeec6c7a 100755
--- a/example/ssd/train.py
+++ b/example/ssd/train.py
@@ -103,6 +103,8 @@ def parse_args():
help='use difficult ground-truths in evaluation')
parser.add_argument('--no-voc07', dest='use_voc07_metric', action='store_false',
help='dont use PASCAL VOC 07 11-point metric')
+ parser.add_argument('--kv-store', type=str, default='local',
+ help='key-value store type')
args = parser.parse_args()
return args
@@ -150,4 +152,5 @@ def parse_class_names(args):
force_nms=args.force_nms,
ovp_thresh=args.overlap_thresh,
use_difficult=args.use_difficult,
- voc07_metric=args.use_voc07_metric)
+ voc07_metric=args.use_voc07_metric,
+ kv_store=args.kv_store)
diff --git a/example/ssd/train/metric.py b/example/ssd/train/metric.py
index 731f8fcc19f4..eeb9796bf4a8 100644
--- a/example/ssd/train/metric.py
+++ b/example/ssd/train/metric.py
@@ -39,6 +39,17 @@ def reset(self):
self.num_inst = [0] * self.num
self.sum_metric = [0.0] * self.num
+ def reset_local(self):
+ """
+ override reset behavior
+ """
+ if getattr(self, 'num', None) is None:
+ self.num_inst = 0
+ self.sum_metric = 0.0
+ else:
+ self.num_inst = [0] * self.num
+ self.sum_metric = [0.0] * self.num
+
def update(self, labels, preds):
"""
Implementation of updating metrics
diff --git a/example/ssd/train/train_net.py b/example/ssd/train/train_net.py
index 304a43b3d949..b37e3d5abcec 100644
--- a/example/ssd/train/train_net.py
+++ b/example/ssd/train/train_net.py
@@ -97,7 +97,7 @@ def train_net(net, train_path, num_classes, batch_size,
use_difficult=False, class_names=None,
voc07_metric=False, nms_topk=400, force_suppress=False,
train_list="", val_path="", val_list="", iter_monitor=0,
- monitor_pattern=".*", log_file=None):
+ monitor_pattern=".*", log_file=None, kv_store=None):
"""
Wrapper for training phase.
@@ -258,6 +258,9 @@ def train_net(net, train_path, num_classes, batch_size,
else:
valid_metric = MApMetric(ovp_thresh, use_difficult, class_names, pred_idx=3)
+ # create kvstore when there are gpus
+ kv = mx.kvstore.create(kv_store) if kv_store else None
+
mod.fit(train_iter,
val_iter,
eval_metric=MultiBoxMetric(),
@@ -272,4 +275,5 @@ def train_net(net, train_path, num_classes, batch_size,
arg_params=args,
aux_params=auxs,
allow_missing=True,
- monitor=monitor)
+ monitor=monitor,
+ kvstore=kv)