diff --git a/Makefile b/Makefile index 4feac25cde85..f9b6bb62de9b 100644 --- a/Makefile +++ b/Makefile @@ -282,8 +282,7 @@ cpplint: --exclude_path src/operator/contrib/ctc_include pylint: -# ideally we want to check all, such as: python tools example tests - pylint python/mxnet --rcfile=$(ROOTDIR)/tests/ci_build/pylintrc + pylint python/mxnet tools/caffe_converter/*.py --rcfile=$(ROOTDIR)/tests/ci_build/pylintrc doc: docs diff --git a/tools/caffe_converter/caffe_parser.py b/tools/caffe_converter/caffe_parser.py index 9fa0c8804067..45efe4715f03 100644 --- a/tools/caffe_converter/caffe_parser.py +++ b/tools/caffe_converter/caffe_parser.py @@ -1,3 +1,5 @@ +"""Parse caffe's protobuf +""" import re try: import caffe diff --git a/tools/caffe_converter/convert_caffe_modelzoo.py b/tools/caffe_converter/convert_caffe_modelzoo.py index 8066aab1216d..f900a6cc7d06 100644 --- a/tools/caffe_converter/convert_caffe_modelzoo.py +++ b/tools/caffe_converter/convert_caffe_modelzoo.py @@ -1,12 +1,13 @@ +"""Convert Caffe's modelzoo +""" import os -import requests import argparse -import logging from convert_model import convert_model from convert_mean import convert_mean import mxnet as mx _mx_caffe_model = 'http://data.mxnet.io/models/imagenet/test/caffe/' + """Dictionary for model meta information For each model, it requires three attributes: @@ -21,6 +22,7 @@ - top-5-acc : top 5 accuracy for testing """ model_meta_info = { + # pylint: disable=line-too-long 'bvlc_alexnet' : { 'prototxt' : 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt', 'caffemodel' : 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', @@ -31,7 +33,7 @@ 'bvlc_googlenet' : { 'prototxt' : 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt', 'caffemodel' : 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel', - 'mean' : (123,117,104), + 'mean' : (123, 117, 104), 'top-1-acc' : 0.687, 'top-5-acc' : 0.889 }, @@ -39,7 +41,7 @@ 'prototxt' : 'https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/c3ba00e272d9f48594acef1f67e5fd12aff7a806/VGG_ILSVRC_16_layers_deploy.prototxt', # 'caffemodel' : 'http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel', 'caffemodel' : 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_16_layers.caffemodel', - 'mean': (123.68,116.779,103.939), + 'mean': (123.68, 116.779, 103.939), 'top-1-acc' : 0.734, 'top-5-acc' : 0.914 }, @@ -47,7 +49,7 @@ 'prototxt' : 'https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt', # 'caffemodel' : 'http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel', 'caffemodel' : 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_19_layers.caffemodel', - 'mean' : (123.68,116.779,103.939), + 'mean' : (123.68, 116.779, 103.939), 'top-1-acc' : 0.731, 'top-5-acc' : 0.913 }, @@ -110,5 +112,5 @@ def convert_caffe_model(model_name, meta_info, dst_dir='./model'): parser.add_argument('model_name', help='can be '+', '.join(model_meta_info.keys())) args = parser.parse_args() assert args.model_name in model_meta_info, 'Unknown model ' + args.model_name - model_name, _ = convert_caffe_model(args.model_name, model_meta_info[args.model_name]) - print('Model is saved into '+model_name) + fname, _ = convert_caffe_model(args.model_name, model_meta_info[args.model_name]) + print('Model is saved into ' + fname) diff --git a/tools/caffe_converter/convert_mean.py b/tools/caffe_converter/convert_mean.py index 44f97f06321d..69cf50c65bd6 100644 --- a/tools/caffe_converter/convert_mean.py +++ b/tools/caffe_converter/convert_mean.py @@ -1,6 +1,8 @@ +"""Convert caffe mean +""" +import argparse import mxnet as mx import numpy as np -import argparse import caffe_parser def convert_mean(binaryproto_fname, output=None): @@ -27,7 +29,7 @@ def convert_mean(binaryproto_fname, output=None): mean_blob.channels, mean_blob.height, mean_blob.width ) # swap channels from Caffe BGR to RGB - img_mean_np[[0,2],:,:] = img_mean_np[[2,0],:,:] + img_mean_np[[0, 2], :, :] = img_mean_np[[2, 0], :, :] nd = mx.nd.array(img_mean_np) if output is not None: mx.nd.save(output, {"mean_image": nd}) diff --git a/tools/caffe_converter/convert_model.py b/tools/caffe_converter/convert_model.py index e612ea5106d9..e4134e272283 100644 --- a/tools/caffe_converter/convert_model.py +++ b/tools/caffe_converter/convert_model.py @@ -1,9 +1,11 @@ +"""Convert caffe model +""" from __future__ import print_function -import mxnet as mx -import numpy as np import argparse import sys import caffe_parser +import mxnet as mx +import numpy as np from convert_symbol import convert_symbol def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): @@ -32,7 +34,7 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): Input dimension """ sym, input_dim = convert_symbol(prototxt_fname) - arg_shapes, output_shapes, aux_shapes = sym.infer_shape(data=tuple(input_dim)) + arg_shapes, _, aux_shapes = sym.infer_shape(data=tuple(input_dim)) arg_names = sym.list_arguments() aux_names = sym.list_auxiliary_states() arg_shape_dic = dict(zip(arg_names, arg_shapes)) @@ -46,8 +48,8 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): layers_proto = caffe_parser.get_layers(caffe_parser.read_prototxt(prototxt_fname)) for layer_name, layer_type, layer_blobs in layer_iter: - if layer_type == 'Convolution' or layer_type == 'InnerProduct' or layer_type == 4 or layer_type == 14 \ - or layer_type == 'PReLU': + if layer_type == 'Convolution' or layer_type == 'InnerProduct' \ + or layer_type == 4 or layer_type == 14 or layer_type == 'PReLU': if layer_type == 'PReLU': assert (len(layer_blobs) == 1) wmat = layer_blobs[0].data @@ -60,7 +62,8 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): if len(layer_blobs[0].shape.dim) > 0: wmat_dim = layer_blobs[0].shape.dim else: - wmat_dim = [layer_blobs[0].num, layer_blobs[0].channels, layer_blobs[0].height, layer_blobs[0].width] + wmat_dim = [layer_blobs[0].num, layer_blobs[0].channels, + layer_blobs[0].height, layer_blobs[0].width] else: wmat_dim = list(layer_blobs[0].shape) wmat = np.array(layer_blobs[0].data).reshape(wmat_dim) @@ -72,7 +75,8 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): wmat[:, [0, 2], :, :] = wmat[:, [2, 0], :, :] assert(wmat.flags['C_CONTIGUOUS'] is True) - sys.stdout.write('converting layer {0}, wmat shape = {1}'.format(layer_name, wmat.shape)) + sys.stdout.write('converting layer {0}, wmat shape = {1}'.format( + layer_name, wmat.shape)) if len(layer_blobs) == 2: bias = np.array(layer_blobs[1].data) bias = bias.reshape((bias.shape[0], 1)) @@ -116,14 +120,15 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): assert gamma.flags['C_CONTIGUOUS'] is True assert beta.flags['C_CONTIGUOUS'] is True - print ('converting scale layer, beta shape = {}, gamma shape = {}'.format(beta.shape, gamma.shape)) + print('converting scale layer, beta shape = {}, gamma shape = {}'.format( + beta.shape, gamma.shape)) elif layer_type == 'BatchNorm': bn_name = layer_name mean = layer_blobs[0].data var = layer_blobs[1].data rescale_factor = layer_blobs[2].data if rescale_factor != 0: - rescale_factor = 1 / rescale_factor + rescale_factor = 1 / rescale_factor mean_name = '{}_moving_mean'.format(bn_name) var_name = '{}_moving_var'.format(bn_name) mean = mean.reshape(aux_shape_dic[mean_name]) @@ -132,21 +137,22 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): aux_params[var_name] = mx.nd.zeros(var.shape) # Get the original epsilon for idx, layer in enumerate(layers_proto): - if layer.name == bn_name: - bn_index = idx + if layer.name == bn_name: + bn_index = idx eps_caffe = layers_proto[bn_index].batch_norm_param.eps # Compensate for the epsilon shift performed in convert_symbol - eps_symbol = float( sym.attr_dict()[bn_name + '_moving_mean']['eps'] ) + eps_symbol = float(sym.attr_dict()[bn_name + '_moving_mean']['eps']) eps_correction = eps_caffe - eps_symbol # Fill parameters aux_params[mean_name][:] = mean * rescale_factor aux_params[var_name][:] = var * rescale_factor + eps_correction assert var.flags['C_CONTIGUOUS'] is True assert mean.flags['C_CONTIGUOUS'] is True - print ('converting batchnorm layer, mean shape = {}, var shape = {}'.format(mean.shape, var.shape)) + print('converting batchnorm layer, mean shape = {}, var shape = {}'.format( + mean.shape, var.shape)) else: assert len(layer_blobs) == 0 - print ('\tskipping layer {} of type {}'.format(layer_name, layer_type)) + print('\tskipping layer {} of type {}'.format(layer_name, layer_type)) if output_prefix is not None: model = mx.mod.Module(symbol=sym, label_names=['prob_label', ]) @@ -157,8 +163,8 @@ def convert_model(prototxt_fname, caffemodel_fname, output_prefix=None): return sym, arg_params, aux_params, input_dim def main(): - parser = argparse.ArgumentParser(description='Caffe prototxt to mxnet model parameter converter.\ - Note that only basic functions are implemented. You are welcomed to contribute to this file.') + parser = argparse.ArgumentParser( + description='Caffe prototxt to mxnet model parameter converter.') parser.add_argument('prototxt', help='The prototxt filename') parser.add_argument('caffemodel', help='The binary caffemodel filename') parser.add_argument('save_model_name', help='The name of the output model prefix') diff --git a/tools/caffe_converter/convert_symbol.py b/tools/caffe_converter/convert_symbol.py index 55808c3c6ad5..501fb1366404 100644 --- a/tools/caffe_converter/convert_symbol.py +++ b/tools/caffe_converter/convert_symbol.py @@ -1,3 +1,5 @@ +"""Convert caffe prototxt to symbol +""" from __future__ import print_function import argparse import re @@ -47,7 +49,8 @@ def _convert_conv_param(param): dilate = 1 if len(param.dilation) == 0 else param.dilation[0] # convert to string except for dilation param_string = "num_filter=%d, pad=(%d,%d), kernel=(%d,%d), stride=(%d,%d), no_bias=%s" % \ - (param.num_output, pad, pad, kernel_size, kernel_size, stride, stride, not param.bias_term) + (param.num_output, pad, pad, kernel_size, kernel_size, + stride, stride, not param.bias_term) # deal with dilation. Won't be in deconvolution if dilate > 1: param_string += ", dilate=(%d, %d)" % (dilate, dilate) @@ -77,122 +80,118 @@ def _parse_proto(prototxt_fname): proto = caffe_parser.read_prototxt(prototxt_fname) # process data layer - input_name, input_dim, layer = _get_input(proto) + input_name, input_dim, layers = _get_input(proto) # only support single input, so always use `data` as the input data mapping = {input_name: 'data'} need_flatten = {input_name: False} - symbol_string = "import mxnet as mx\n" \ - + "data = mx.symbol.Variable(name='data')\n"; + symbol_string = "import mxnet as mx\ndata = mx.symbol.Variable(name='data')\n" - connection = dict() - symbols = dict() - top = dict() flatten_count = 0 output_name = "" prev_name = None # convert reset layers one by one - for i in range(len(layer)): + for i, layer in enumerate(layers): type_string = '' param_string = '' skip_layer = False - name = re.sub('[-/]', '_', layer[i].name) - if layer[i].type == 'Convolution' or layer[i].type == 4: + name = re.sub('[-/]', '_', layer.name) + if layer.type == 'Convolution' or layer.type == 4: type_string = 'mx.symbol.Convolution' - param_string = _convert_conv_param(layer[i].convolution_param) + param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True - if layer[i].type == 'Deconvolution' or layer[i].type == 39: + if layer.type == 'Deconvolution' or layer.type == 39: type_string = 'mx.symbol.Deconvolution' - param_string = _convert_conv_param(layer[i].convolution_param) + param_string = _convert_conv_param(layer.convolution_param) need_flatten[name] = True - if layer[i].type == 'Pooling' or layer[i].type == 17: + if layer.type == 'Pooling' or layer.type == 17: type_string = 'mx.symbol.Pooling' - param_string = _convert_pooling_param(layer[i].pooling_param) + param_string = _convert_pooling_param(layer.pooling_param) need_flatten[name] = True - if layer[i].type == 'ReLU' or layer[i].type == 18: + if layer.type == 'ReLU' or layer.type == 18: type_string = 'mx.symbol.Activation' param_string = "act_type='relu'" - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'TanH' or layer[i].type == 23: + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'TanH' or layer.type == 23: type_string = 'mx.symbol.Activation' param_string = "act_type='tanh'" - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'Sigmoid' or layer[i].type == 19: + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'Sigmoid' or layer.type == 19: type_string = 'mx.symbol.Activation' param_string = "act_type='sigmoid'" - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'LRN' or layer[i].type == 15: + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'LRN' or layer.type == 15: type_string = 'mx.symbol.LRN' - param = layer[i].lrn_param + param = layer.lrn_param param_string = "alpha=%f, beta=%f, knorm=%f, nsize=%d" % ( param.alpha, param.beta, param.k, param.local_size) need_flatten[name] = True - if layer[i].type == 'InnerProduct' or layer[i].type == 14: + if layer.type == 'InnerProduct' or layer.type == 14: type_string = 'mx.symbol.FullyConnected' - param = layer[i].inner_product_param + param = layer.inner_product_param param_string = "num_hidden=%d, no_bias=%s" % ( param.num_output, not param.bias_term) need_flatten[name] = False - if layer[i].type == 'Dropout' or layer[i].type == 6: + if layer.type == 'Dropout' or layer.type == 6: type_string = 'mx.symbol.Dropout' - param = layer[i].dropout_param + param = layer.dropout_param param_string = "p=%f" % param.dropout_ratio - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'Softmax' or layer[i].type == 20: + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'Softmax' or layer.type == 20: type_string = 'mx.symbol.SoftmaxOutput' - if layer[i].type == 'Flatten' or layer[i].type == 8: + if layer.type == 'Flatten' or layer.type == 8: type_string = 'mx.symbol.Flatten' need_flatten[name] = False - if layer[i].type == 'Split' or layer[i].type == 22: + if layer.type == 'Split' or layer.type == 22: type_string = 'split' # will process later - if layer[i].type == 'Concat' or layer[i].type == 3: + if layer.type == 'Concat' or layer.type == 3: type_string = 'mx.symbol.Concat' need_flatten[name] = True - if layer[i].type == 'Crop': + if layer.type == 'Crop': type_string = 'mx.symbol.Crop' need_flatten[name] = True param_string = 'center_crop=True' - if layer[i].type == 'BatchNorm': + if layer.type == 'BatchNorm': type_string = 'mx.symbol.BatchNorm' - param = layer[i].batch_norm_param + param = layer.batch_norm_param # CuDNN requires eps to be greater than 1e-05 # We compensate for this change in convert_model epsilon = param.eps - if(epsilon <= 1e-05): - epsilon = 1e-04 + if (epsilon <= 1e-05): + epsilon = 1e-04 param_string = 'use_global_stats=%s, fix_gamma=False, eps=%f' % ( - param.use_global_stats, epsilon) - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'Scale': - assert layer[i-1].type == 'BatchNorm' - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] + param.use_global_stats, epsilon) + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'Scale': + assert layers[i-1].type == 'BatchNorm' + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] skip_layer = True - prev_name = re.sub('[-/]', '_', layer[i-1].name) - if layer[i].type == 'PReLU': + prev_name = re.sub('[-/]', '_', layers[i-1].name) + if layer.type == 'PReLU': type_string = 'mx.symbol.LeakyReLU' - param = layer[i].prelu_param + param = layer.prelu_param param_string = "act_type='prelu', slope=%f" % param.filler.value - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] - if layer[i].type == 'Eltwise': + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] + if layer.type == 'Eltwise': type_string = 'mx.symbol.broadcast_add' param_string = "" need_flatten[name] = False - if layer[i].type == 'Reshape': + if layer.type == 'Reshape': type_string = 'mx.symbol.Reshape' need_flatten[name] = False - param = layer[i].reshape_param + param = layer.reshape_param param_string = "shape=(%s)" % (','.join(param.shape.dim),) - if layer[i].type == 'AbsVal': - type_string = 'mx.symbol.abs' - need_flatten[name] = need_flatten[mapping[layer[i].bottom[0]]] + if layer.type == 'AbsVal': + type_string = 'mx.symbol.abs' + need_flatten[name] = need_flatten[mapping[layer.bottom[0]]] if skip_layer: - assert len(layer[i].bottom) == 1 + assert len(layer.bottom) == 1 symbol_string += "%s = %s\n" % (name, prev_name) elif type_string == '': - raise ValueError('Unknown layer %s!' % layer[i].type) + raise ValueError('Unknown layer %s!' % layer.type) elif type_string != 'split': - bottom = layer[i].bottom + bottom = layer.bottom if param_string != "": param_string = ", " + param_string if len(bottom) == 1: @@ -209,8 +208,8 @@ def _parse_proto(prototxt_fname): else: symbol_string += "%s = %s(name='%s', *[%s] %s)\n" % ( name, type_string, name, ','.join([mapping[x] for x in bottom]), param_string) - for j in range(len(layer[i].top)): - mapping[layer[i].top[j]] = name + for j in range(len(layer.top)): + mapping[layer.top[j]] = name output_name = name return symbol_string, output_name, input_dim @@ -230,9 +229,9 @@ def convert_symbol(prototxt_fname): Input shape """ sym, output_name, input_dim = _parse_proto(prototxt_fname) - exec(sym) + exec(sym) # pylint: disable=exec-used _locals = locals() - exec("ret = " + output_name, globals(), _locals) + exec("ret = " + output_name, globals(), _locals) # pylint: disable=exec-used ret = _locals['ret'] return ret, input_dim diff --git a/tools/caffe_converter/test_converter.py b/tools/caffe_converter/test_converter.py index 0970ffe95268..128e7c276c50 100644 --- a/tools/caffe_converter/test_converter.py +++ b/tools/caffe_converter/test_converter.py @@ -1,31 +1,32 @@ """Test converted models """ -import os, sys +import os +import sys +import logging +import mxnet as mx +from convert_caffe_modelzoo import convert_caffe_model, get_model_meta_info curr_path = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(curr_path, "../../example/image-classification")) - -from test_score import download_data -from score import score -from convert_caffe_modelzoo import convert_caffe_model, get_model_meta_info -import logging +from test_score import download_data # pylint: disable=wrong-import-position +from score import score # pylint: disable=wrong-import-position logging.basicConfig(level=logging.DEBUG) -import mxnet as mx - def test_imagenet_model(model_name, val_data, gpus, batch_size): + """test model on imagenet """ logging.info('test %s', model_name) meta_info = get_model_meta_info(model_name) [model_name, mean] = convert_caffe_model(model_name, meta_info) sym, arg_params, aux_params = mx.model.load_checkpoint(model_name, 0) - acc = [mx.metric.create('acc'), mx.metric.create('top_k_accuracy', top_k = 5)] + acc = [mx.metric.create('acc'), mx.metric.create('top_k_accuracy', top_k=5)] if isinstance(mean, str): mean_args = {'mean_img':mean} else: mean_args = {'rgb_mean':','.join([str(i) for i in mean])} + print(val_data) (speed,) = score(model=(sym, arg_params, aux_params), - data_val=val, - label_name = 'prob_label', + data_val=val_data, + label_name='prob_label', metrics=acc, gpus=gpus, batch_size=batch_size, @@ -37,7 +38,7 @@ def test_imagenet_model(model_name, val_data, gpus, batch_size): assert acc[0].get()[1] > meta_info['top-1-acc'] - 0.3 assert acc[1].get()[1] > meta_info['top-5-acc'] - 0.3 -if __name__ == '__main__': +def main(): gpus = mx.test_utils.list_gpus() assert len(gpus) > 0 batch_size = 32 * len(gpus) @@ -47,3 +48,6 @@ def test_imagenet_model(model_name, val_data, gpus, batch_size): val = download_data() for m in models: test_imagenet_model(m, val, ','.join([str(i) for i in gpus]), batch_size) + +if __name__ == '__main__': + main()