apache · apeforest · Mar 10, 2020 · Jan 30, 2020 · Jan 30, 2020 · Jan 30, 2020
diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md
@@ -50,7 +50,8 @@ Hence, in this utility, we will build the functionality to allow users and devel
 Provided you have MXNet installed (any version >= 1.5.1), all you need to use opperf utility is to add path to your cloned MXNet repository to the PYTHONPATH.
 
 Note: 
-To install MXNet, refer [Installing MXNet page](https://mxnet.apache.org/versions/master/install/index.html)
+1. Currently, opperf utility requires a cloned mxnet repo. It isn't supported on PyPi binary yet. [Work in Progress]
+2. To install MXNet, refer [Installing MXNet page](https://mxnet.apache.org/versions/master/install/index.html)
 
 ```
 export PYTHONPATH=$PYTHONPATH:/path/to/incubator-mxnet/
@@ -72,6 +73,8 @@ python incubator-mxnet/benchmark/opperf/opperf.py --output-format json --output-
 
 3. **dtype** : By default, `float32`. You can override and set the global dtype for all operator benchmarks. Example: --dtype float64.
 
+4. **profiler** : By default, 'native'. You can override and set the global profiler for all operator benchmarks. Example: --profiler 'python'.
+
 ## Usecase 2 - Run benchmarks for all the operators in a specific category
 
 For example, you want to run benchmarks for all NDArray Broadcast Binary Operators, Ex: broadcast_add, broadcast_mod, broadcast_pow etc., You just run the following python script.
@@ -117,6 +120,7 @@ add_res = run_performance_test(nd.add, run_backward=True, dtype='float32', ctx=m
                                inputs=[{"lhs": (1024, 1024),
                                         "rhs": (1024, 1024)}],
                                warmup=10, runs=25)
+print(add_res)
 ```
 
 Output for the above benchmark run, on a CPU machine, would look something like below:
@@ -143,6 +147,7 @@ add_res = run_performance_test([nd.add, nd.subtract], run_backward=True, dtype='
                                inputs=[{"lhs": (1024, 1024),
                                         "rhs": (1024, 1024)}],
                                warmup=10, runs=25)
+print(add_res)
 ```
 
 Output for the above benchmark run, on a CPU machine, would look something like below:

diff --git a/benchmark/opperf/nd_operations/array_manipulation_operators.py b/benchmark/opperf/nd_operations/array_manipulation_operators.py
@@ -0,0 +1,254 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+
+from mxnet import nd
+from benchmark.opperf.utils.benchmark_utils import run_performance_test
+from benchmark.opperf.utils.common_utils import merge_map_list
+from benchmark.opperf.rules.default_params import MX_OP_MODULE
+
+from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks
+from benchmark.opperf.utils.op_registry_utils import get_all_rearrange_operators, \
+    get_all_shape_operators, get_all_expanding_operators, get_all_rounding_operators
+
+"""Performance benchmark tests for MXNet Array Manipulation Operators.
+
+Array Rearrange Operators
+1. transpose
+2. swapaxes (alias SwapAxis)
+3. flip (alias reverse)
+4. depth_to_space
+5. space_to_depth
+
+Array Shape Manipulation Operators
+1. split (alias SliceChannel)
+2. diag
+3. reshape
+4. reshape_like
+5. size_array
+6. shape_array
+
+Array Expanding Operators
+1. broadcast_axes (alias broadcast_axis)
+2. broadcast_to
+3. broadcast_like
+4. repeat
+5. tile
+6. pad
+7. expand_dims
+
+
+Array Rounding Operators
+1. round
+2. rint
+3. fix
+4. floor
+5. ceil
+6. trunc
+
+Array Join & Split Operators
+1. concat
+2. split
+3. stack
+
+"""
+
+
+def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the
+    rearrange operators in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # Fetch all array rearrange operators
+    mx_rearrange_ops = get_all_rearrange_operators()
+
+    # Run benchmarks
+    mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, warmup, runs)
+    return mx_rearrange_op_results
+
+
+def run_shape_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the
+    array shape operators  in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # Fetch all array shape operators
+    mx_shape_ops = get_all_shape_operators()
+
+    # Run benchmarks
+    mx_shape_op_results = run_op_benchmarks(mx_shape_ops, dtype, ctx, profiler, warmup, runs)
+    return mx_shape_op_results
+
+
+def run_expanding_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the
+    array expanding operators  in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # Fetch all array expanding operators
+    mx_expanding_ops = get_all_expanding_operators()
+
+    # Run benchmarks
+    mx_expanding_op_results = run_op_benchmarks(mx_expanding_ops, dtype, ctx, profiler, warmup, runs)
+    return mx_expanding_op_results
+
+
+def run_rounding_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the
+    array rounding operators  in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # Fetch all array rounding operators
+    mx_rounding_ops = get_all_rounding_operators()
+
+    # Run benchmarks
+    mx_rounding_op_results = run_op_benchmarks(mx_rounding_ops, dtype, ctx, profiler, warmup, runs)
+    return mx_rounding_op_results
+
+
+def run_join_split_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
+    """Runs benchmarks with the given context and precision (dtype) for all the
+    join & split operators  in MXNet.
+
+    Parameters
+    ----------
+    ctx: mx.ctx
+        Context to run benchmarks
+    dtype: str, default 'float32'
+        Precision to use for benchmarks
+    profiler: str, default 'native'
+        Type of Profiler to use (native/python)
+    warmup: int, default 25
+        Number of times to run for warmup
+    runs: int, default 100
+        Number of runs to capture benchmark results
+
+    Returns
+    -------
+    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
+
+    """
+    # backward not supported for all 3 ops - concat, stack, split
+    # concat
+    concat_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "concat")],
+                                                      run_backward=False,
+                                                      dtype=dtype,
+                                                      ctx=ctx,
+                                                      profiler=profiler,
+                                                      inputs=[{"args0":nd.random_normal(shape=(100,100)),
+                                                               "args1":nd.random_normal(shape=(100,100)),
+                                                               "args2":nd.random_normal(shape=(100,100))}
+                                                              ],
+                                                      warmup=warmup,
+                                                      runs=runs)
+
+    # split
+    split_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "split")],
+                                                      run_backward=False,
+                                                      dtype=dtype,
+                                                      ctx=ctx,
+                                                      profiler=profiler,
+                                                      inputs=[{"data": (1024, 1024), "num_outputs": 2},
+                                                              {"data": (10000, 1), "num_outputs": 1},
+                                                              {"data": (10000, 100), "num_outputs": 10}
+                                                              ],
+                                                      warmup=warmup,
+                                                      runs=runs)
+
+    # stack
+    stack_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "stack")],
+                                                      run_backward=False,
+                                                      dtype=dtype,
+                                                      ctx=ctx,
+                                                      profiler=profiler,
+                                                      inputs=[{"args0":nd.random_normal(shape=(100,100)),
+                                                               "args1":nd.random_normal(shape=(100,100)),
+                                                               "args2":nd.random_normal(shape=(100,100))}
+                                                              ],
+                                                      warmup=warmup,
+                                                      runs=runs)
+    mx_join_split_op_results = merge_map_list(concat_benchmark_res + split_benchmark_res + stack_benchmark_res)
+    return mx_join_split_op_results
diff --git a/benchmark/opperf/nd_operations/array_rearrange.py b/benchmark/opperf/nd_operations/array_rearrange.py
diff --git a/benchmark/opperf/opperf.py b/benchmark/opperf/opperf.py
@@ -40,11 +40,13 @@
     run_convolution_operators_benchmarks, run_transpose_convolution_operators_benchmarks
 from benchmark.opperf.nd_operations.nn_basic_operators import run_nn_basic_operators_benchmarks
 from benchmark.opperf.nd_operations.nn_optimizer_operators import run_optimizer_operators_benchmarks
-from benchmark.opperf.nd_operations.array_rearrange import run_rearrange_operators_benchmarks
 from benchmark.opperf.nd_operations.indexing_routines import run_indexing_routines_benchmarks
 from benchmark.opperf.nd_operations.nn_loss_operators import run_loss_operators_benchmarks
 from benchmark.opperf.nd_operations.linalg_operators import run_linalg_operators_benchmarks
 from benchmark.opperf.nd_operations.misc_operators import run_mx_misc_operators_benchmarks
+from benchmark.opperf.nd_operations.array_manipulation_operators import run_rearrange_operators_benchmarks, \
+    run_shape_operators_benchmarks, run_expanding_operators_benchmarks, run_rounding_operators_benchmarks, \
+    run_join_split_operators_benchmarks
 
 from benchmark.opperf.utils.common_utils import merge_map_list, save_to_file
 from benchmark.opperf.utils.op_registry_utils import get_operators_with_no_benchmark, \
@@ -87,11 +89,23 @@ def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='n
     # Run all Sorting and Searching operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_sorting_searching_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
+    # Run all Indexing routines benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+
     # Run all Array Rearrange operations benchmarks with default input values
     mxnet_operator_benchmark_results.append(run_rearrange_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
 
-    # Run all Indexing routines benchmarks with default input values
-    mxnet_operator_benchmark_results.append(run_indexing_routines_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler, warmup=warmup, runs=runs))
+    # Run all Array Shape Manipulation operations benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_shape_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+
+    # Run all Array Expansion operations benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_expanding_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+
+    # Run all Array Rounding operations benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_rounding_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
+
+    # Run all Array Join & Split operations benchmarks with default input values
+    mxnet_operator_benchmark_results.append(run_join_split_operators_benchmarks(ctx=ctx, dtype=dtype, profiler=profiler))
 
     # ************************ MXNET NN OPERATOR BENCHMARKS ****************************