Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[Opperf] Make module/namespace of the operator parameterized (#15226)
Browse files Browse the repository at this point in the history
* Make module/namespace parameterized to choose between mx.nd or mx.np

* Fix comments

* Add automated way to fetch compile/runtime flags for MXNet

* Fix warmup and runs count

* Fix Pooling operator benchmarks
  • Loading branch information
sandeep-krishnamurthy committed Jun 28, 2019
1 parent 92fce90 commit e8f3e91
Show file tree
Hide file tree
Showing 14 changed files with 421 additions and 65 deletions.
5 changes: 3 additions & 2 deletions benchmark/opperf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ With this utility, for each MXNet operator you can get the following details:
**Timing**
1. Forward execution time
2. Backward execution time
3. Time spent for memory management

**Memory**
1. Total memory allocated
1. Average and Max memory allocated

NOTE: This is the `pool memory`. It does not reflect the exact memory requested by the operator.

# Motivation

Expand Down
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
get_all_elemen_wise_binary_operators


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
broadcast operators in MXNet.
Expand All @@ -48,9 +48,9 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -65,7 +65,7 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
element_wise operators in MXNet.
Expand Down
13 changes: 6 additions & 7 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list

from benchmark.opperf.rules.default_params import MX_OP_MODULE
"""Performance benchmark tests for MXNet NDArray GEMM Operators.
1. dot
Expand All @@ -35,7 +34,7 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
operators (dot, batch_dot) in MXNet.
Expand All @@ -45,9 +44,9 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -57,7 +56,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
"""
# Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[nd.dot], run_backward=True,
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
Expand All @@ -71,7 +70,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
warmup=warmup, runs=runs)

batch_dot_benchmark_res = run_performance_test(
[nd.batch_dot], run_backward=True,
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
Expand Down
15 changes: 8 additions & 7 deletions benchmark/opperf/nd_operations/nn_activation_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Activation Operators.
Expand All @@ -35,7 +35,7 @@
"""


def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the activation
operators (relu, sigmoid, softmax) in MXNet.
Expand All @@ -45,9 +45,9 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand All @@ -56,7 +56,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
"""
# Relu and its variation
relu_benchmark_res = run_performance_test([nd.LeakyReLU],
relu_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "LeakyReLU")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -78,7 +78,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10

# Sigmoid => Covered as part of Unary ops
# Hard_Sigmoid
hard_sigmoid_benchmark_res = run_performance_test([nd.hard_sigmoid],
hard_sigmoid_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "hard_sigmoid")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -90,7 +90,8 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
runs=runs)

# Softmax, LogSoftmax
softmax_benchmark_res = run_performance_test([nd.softmax, nd.log_softmax],
softmax_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "softmax"),
getattr(MX_OP_MODULE, "log_softmax")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
10 changes: 5 additions & 5 deletions benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray basic NN Operators.
Expand All @@ -29,9 +29,9 @@
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([nd.FullyConnected],
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -49,7 +49,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
runs=runs)

# Dropout benchmarks
dropout_benchmark_res = run_performance_test([nd.Dropout],
dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -62,7 +62,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
warmup=warmup,
runs=runs)
# BatchNorm benchmarks
batchnorm_benchmark_res = run_performance_test([nd.BatchNorm],
batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
20 changes: 9 additions & 11 deletions benchmark/opperf/nd_operations/nn_conv_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Convolution and Pooling Operators.
Expand Down Expand Up @@ -51,7 +51,7 @@
"""


def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]

Expand All @@ -61,7 +61,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
for pool_type in pool_types:
for global_pool in global_pool_types:
for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
pool1d_benchmark_res += run_performance_test([nd.Pooling],
pool1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -70,13 +70,12 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": 1,
"pad": 1,
"layout": 'NCW'}
"pad": 1}
],
warmup=warmup,
runs=runs)
for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
pool2d_benchmark_res += run_performance_test([nd.Pooling],
pool2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -85,8 +84,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": (1, 1),
"pad": (0, 0),
"layout": 'NCHW'}
"pad": (0, 0)}
],
warmup=warmup,
runs=runs)
Expand All @@ -95,11 +93,11 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
return mx_pooling_op_results


def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# Conv1D Benchmarks
conv1d_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_benchmark_res += run_performance_test([nd.Convolution],
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -118,7 +116,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=1
# Conv2D Benchmarks
conv2d_benchmark_res = []
for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
conv2d_benchmark_res += run_performance_test([nd.Convolution],
conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/random_sampling_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators


def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the random sampling
operators in MXNet.
Expand All @@ -44,9 +44,9 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/reduction_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the reduction
operators in MXNet.
Expand All @@ -41,9 +41,9 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/unary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the unary
operators in MXNet.
Expand All @@ -45,9 +45,9 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results
Returns
Expand Down
17 changes: 11 additions & 6 deletions benchmark/opperf/opperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
from benchmark.opperf.nd_operations.nn_basic_operators import run_nn_basic_operators_benchmarks

from benchmark.opperf.utils.common_utils import merge_map_list, save_to_file
from benchmark.opperf.utils.op_registry_utils import get_operators_with_no_benchmark
from benchmark.opperf.utils.op_registry_utils import get_operators_with_no_benchmark,\
get_current_runtime_features


def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
Expand Down Expand Up @@ -102,17 +103,18 @@ def _parse_mxnet_context(ctx):
device_id = int(ctx[4:-1])
return mx.gpu(device_id)


def main():
# 1. GET USER INPUTS
parser = argparse.ArgumentParser(
description='Run all the MXNet operators (NDArray) benchmarks')
parser = argparse.ArgumentParser(description='Run all the MXNet operator benchmarks')

parser.add_argument('--ctx', type=str, default='cpu',
help='Global context to run all benchmarks. By default, cpu on a '
'CPU machine, gpu(0) on a GPU machine. '
'Valid Inputs - cpu, gpu, gpu(0), gpu(1)...')
parser.add_argument('--dtype', type=str, default='float32', help='DType (Precision) to run benchmarks. By default, '
'float32. Valid Inputs - float32, float64.')
'float32. Valid Inputs - float32, float64, int32, '
'int64')
parser.add_argument('-f', '--output-format', type=str, default='json',
choices=['json', 'md'],
help='Benchmark result output format. By default, json. '
Expand All @@ -129,17 +131,20 @@ def main():
# 2. RUN BENCHMARKS
ctx = _parse_mxnet_context(args.ctx)
dtype = args.dtype
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=args.dtype)
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype)

# 3. PREPARE OUTPUTS
save_to_file(final_benchmark_results, args.output_file, args.output_format)
run_time_features = get_current_runtime_features()
save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)

# 4. Generate list of MXNet operators not covered in benchmarks
ops_not_covered = get_operators_with_no_benchmark(final_benchmark_results.keys())
for idx, op in enumerate(ops_not_covered):
print(f"{idx}. {op}")

return 0


if __name__ == '__main__':
sys.exit(main())

Loading

0 comments on commit e8f3e91

Please sign in to comment.