apache · szha · Jun 12, 2019 · May 16, 2019 · May 16, 2019 · May 16, 2019
diff --git a/benchmark/__init__.py b/benchmark/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md
@@ -0,0 +1,120 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# MXNet Operator Performance Benchmarks
+
+A Python utility for benchmarking and profiling individual MXNet operator execution.
+
+# How to use
+
+## Prerequisites
+
+This utility uses MXNet profiler under the hood to fetch compute and memory metrics. Hence, you need to build MXNet with `USE_PROFILER=1` flag.
+
+Make sure to build the flavor of MXNet, for example - with/without MKL, with CUDA 9 or 10.1 etc., on which you would like to measure operator performance.
+
+## Usecase 1 - Run benchmarks for all the operators
+
+Below command runs all the MXNet operators (NDArray and Gluon) benchmarks with default inputs and saves the final result as JSON in the provided file.
+
+```
+python incubator-mxnet/benchmark/opperf/opperf.py --output-format json --output-file mxnet_operator_benchmark_results.json
+```
+
+**Other Options:**
+
+1. **output-format** : json or md for markdown file output or csv.
+
+2. **ctx** : By default, cpu on CPU machine, gpu(0) on GPU machine. You can override and set the global context for all operator benchmarks. Example: --ctx gpu(2).
+
+3. **dtype** : By default, float32. You can override and set the global dtype for all operator benchmarks. Example: --dtype float64.
+
+## Usecase 2 - Run benchmarks for all the operators in a specific category
+
+For example, you want to run benchmarks for all NDArray Arithmetic Operators, you just run the following python script.
+
+```
+#! /usr/bin/python
+from benchmark.opperf.tensor_operations.arithmetic_operations import run_arithmetic_operators_benchmarks
+
+# Run all Arithmetic operations benchmarks with default input values
+print(run_arithmetic_operators_benchmarks())
+```
+
+Output for the above benchmark run, on a CPU machine, would look something like below:
+
+```
+{'subtract': [{'avg_time_forward_broadcast_sub': 5.5137, 
+               'avg_time_mem_alloc_cpu/0': 207618.0469,
+               'avg_time_backward_broadcast_sub': 7.2976, 
+               'inputs': {'lhs': (1024, 1024), 'rhs': (1024, 1024)}}
+             ],
+ 'add': [{'avg_time_mem_alloc_cpu/0': 207618.0469,
+          'avg_time_forward_broadcast_add': 4.309,
+          'avg_time_backward_broadcast_add': 5.6063,
+          'inputs': {'lhs': (1024, 1024), 'rhs': (1024, 1024)}},
+        ],
+ 'multiply': [{'avg_time_backward_broadcast_mul': 19.1712,
+               'avg_time_mem_alloc_cpu/0': 207618.0469,
+               'avg_time_forward_broadcast_mul': 6.4855, 
+               'inputs': {'lhs': (1024, 1024), 'rhs': (1024, 1024)}},
+             ]
+}
+```
+
+## Usecase 3 - Run benchmarks for specific operator
+For example, you want to run benchmarks for `nd.add` operator in MXNet, you just run the following python script.
+
+```
+#! /usr/bin/python
+import mxnet as mx
+from mxnet import nd
+
+from benchmark.opperf.utils.benchmark_utils import run_performance_test
+
+add_res = run_performance_test(nd.add, run_backward=True, dtype='float32', ctx=mx.cpu(),
+                               inputs=[{"lhs": (1024, 1024),
+                                        "rhs": (1024, 1024)}],
+                               warmup=10, runs=25)
+```
+
+Output for the above benchmark run, on a CPU machine, would look something like below:
+
+```
+{'add': [{'avg_time_mem_alloc_cpu/0': 102760.4453,
+          'avg_time_forward_broadcast_add': 4.0372,
+          'avg_time_backward_broadcast_add': 5.3841,
+          'inputs': {'lhs': (1024, 1024), 'rhs': (1024, 1024)}}]}
+
+```
+# How does it work under the hood?
+
+Under the hood, executes NDArray operator or a Gluon block using randomly generated data. Use MXNet profiler to get summary of operator execution:
+1. Memory
+2. Computation time
+
+See design proposal document for more details - https://cwiki.apache.org/confluence/display/MXNET/MXNet+Operator+Benchmarks 
+
+# TODO
+
+All contributions are welcome. Below is the list of desired features:
+
+1. Cover all MXNet operators.
+2. Enhance MXNet profiler with additional APIs to programmatically fetch and process profiler data.
+3. Integration with CI/CD system to run operator benchmarks for PR builds, nightly builds.
+4. Dashboards and other modes of presentation of results for analyzing and planning tasks such as operator performance improvements.
+5. Integration with tools such as [Hypothesis](https://hypothesis.readthedocs.io/en/latest/) for randomized input generation for profiling to identify bottlenecks in operators.
diff --git a/benchmark/opperf/__init__.py b/benchmark/opperf/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmark/opperf/custom_operations/__init__.py b/benchmark/opperf/custom_operations/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmark/opperf/custom_operations/custom_operations.py b/benchmark/opperf/custom_operations/custom_operations.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+
+"""
+MXNet's Custom Operator Benchmark Tests.
+
+It does a simple element wise addition to make sure computation
+is not too much and we can observe custom operator logistics overhead.
+"""
+
+
+# 1. Define Custom Operator - Element wise Addition Multiplication
+class CustomAddOne(mx.operator.CustomOp):
+    def forward(self, is_train, req, in_data, out_data, aux):
+        self.assign(out_data[0], req[0], in_data[0] + 1)
+
+    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
+        self.assign(in_grad[0], req[0], out_grad[0])
+
+
+@mx.operator.register("CustomAddOne")
+class CustomAddOneProp(mx.operator.CustomOpProp):
+    def __init__(self):
+        super(CustomAddOneProp, self).__init__(need_top_grad=True)
+
+    def list_arguments(self):
+        return ['in']
+
+    def list_outputs(self):
+        return ['output']
+
+    def infer_shape(self, in_shape):
+        # inputs, outputs, aux
+        return [in_shape[0]], [in_shape[0]], []
+
+    def create_operator(self, ctx, shapes, dtypes):
+        return CustomAddOne()
+
+
+"""Helps to benchmark MXNet's Custom Op for Element wise addition on a (1000, 1) tensor.
+    Performs both forward and backward operation.
+
+    This test mainly uncovers core custom op overhead in MXNet.
+
+    Benchmark will be done on the following operation:
+    native_add -> native_add -> native_add -> CUSTOM_ADD -> native_add -> native_add -> native_add
+
+    By default run on 'float32' precision.
+"""
+
+# TODO
diff --git a/benchmark/opperf/nn_operations/__init__.py b/benchmark/opperf/nn_operations/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/benchmark/opperf/nn_operations/activation_operations.py b/benchmark/opperf/nn_operations/activation_operations.py
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Performance benchmark Tests for MXNet Gluon Activation Layers.
+
+TODO
+
+1. LeakyRelu
+2. PRelu
+3. Activation (Sigmoid)
+4. Activation (Softmax) (Note - GLUON does not have Softmax block, using NDArray APIs for now)
+5. Activation (Log_Softmax) (Note - GLUON does not have Log_Softmax block, using NDArray APIs for now)
+6. Activation (tanh)
+7. Elu
+8. Selu
+9. Swish
+"""
diff --git a/benchmark/opperf/nn_operations/basic_operations.py b/benchmark/opperf/nn_operations/basic_operations.py
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Performance benchmark tests for MXNet Gluon Basic NN Layers
+
+TODO
+
+1. Dense
+2. Lambda
+3. Flatten
+4. Embedding
+
+"""
diff --git a/benchmark/opperf/nn_operations/convolution_operations.py b/benchmark/opperf/nn_operations/convolution_operations.py
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+
+from mxnet.gluon import nn
+
+from benchmark.opperf.utils.benchmark_utils import run_performance_test
+from benchmark.opperf.utils.common_utils import merge_map_list
+
+""" Performance benchmark tests for MXNet Gluon Convolution Layers
+
+1. Conv2D
+
+TODO
+
+2. Conv1D
+3. Conv1DTranspose
+4. Conv2DTranspose
+
+NOTE: Number of warmup and benchmark runs for convolution may need to be reduced as the computation
+is heavy and within first 25 runs results stabilizes without variation.
+"""
+
+
+def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=25):
+    """Runs benchmarks with the given context and precision (dtype)for all convolution Gluon blocks
+    in MXNet.
+
+    :param ctx: Context to run benchmarks
+    :param dtype: Precision to use for benchmarks
+    :param warmup: Number of times to run for warmup
+    :param runs: Number of runs to capture benchmark results
+    :return: Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+
+    # Benchmark Gluon Conv2D Block.
+    conv2d_res = run_performance_test(nn.Conv2D, run_backward=True, dtype=dtype, ctx=ctx,
+                                      inputs=[{"data": (32, 3, 256, 256),
+                                               "channels": 64,
+                                               "kernel_size": (3, 3),
+                                               "strides": (1, 1),
+                                               "padding": (0, 0),
+                                               "dilation": (1, 1),
+                                               "layout": "NCHW",
+                                               "activation": None, }],
+                                      warmup=warmup, runs=runs)
+
+    # Prepare combined results for Gluon Convolution operators
+    mx_gluon_conv_op_results = merge_map_list([conv2d_res])
+    return mx_gluon_conv_op_results