Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[Opperf] Make module/namespace of the operator parameterized #15226

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions benchmark/opperf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ With this utility, for each MXNet operator you can get the following details:
**Timing**
1. Forward execution time
2. Backward execution time
3. Time spent for memory management

**Memory**
1. Total memory allocated
1. Average and Max memory allocated

NOTE: This is the `pool memory`. It does not reflect the exact memory requested by the operator.

# Motivation

Expand Down
8 changes: 4 additions & 4 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
get_all_elemen_wise_binary_operators


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
apeforest marked this conversation as resolved.
Show resolved Hide resolved
"""Runs benchmarks with the given context and precision (dtype)for all the binary
broadcast operators in MXNet.

Expand All @@ -48,9 +48,9 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand All @@ -65,7 +65,7 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the binary
element_wise operators in MXNet.

Expand Down
13 changes: 6 additions & 7 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list

from benchmark.opperf.rules.default_params import MX_OP_MODULE
"""Performance benchmark tests for MXNet NDArray GEMM Operators.

1. dot
Expand All @@ -35,7 +34,7 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
operators (dot, batch_dot) in MXNet.

Expand All @@ -45,9 +44,9 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand All @@ -57,7 +56,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
"""
# Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[nd.dot], run_backward=True,
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
Expand All @@ -71,7 +70,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs
warmup=warmup, runs=runs)

batch_dot_benchmark_res = run_performance_test(
[nd.batch_dot], run_backward=True,
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
Expand Down
15 changes: 8 additions & 7 deletions benchmark/opperf/nd_operations/nn_activation_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Activation Operators.

Expand All @@ -35,7 +35,7 @@
"""


def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the activation
operators (relu, sigmoid, softmax) in MXNet.

Expand All @@ -45,9 +45,9 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand All @@ -56,7 +56,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10

"""
# Relu and its variation
relu_benchmark_res = run_performance_test([nd.LeakyReLU],
relu_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "LeakyReLU")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -78,7 +78,7 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10

# Sigmoid => Covered as part of Unary ops
# Hard_Sigmoid
hard_sigmoid_benchmark_res = run_performance_test([nd.hard_sigmoid],
hard_sigmoid_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "hard_sigmoid")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -90,7 +90,8 @@ def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10
runs=runs)

# Softmax, LogSoftmax
softmax_benchmark_res = run_performance_test([nd.softmax, nd.log_softmax],
softmax_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "softmax"),
getattr(MX_OP_MODULE, "log_softmax")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
10 changes: 5 additions & 5 deletions benchmark/opperf/nd_operations/nn_basic_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray basic NN Operators.

Expand All @@ -29,9 +29,9 @@
"""


def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# FullyConnnected operator benchmarks
fc_benchmark_res = run_performance_test([nd.FullyConnected],
fc_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "FullyConnected")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -49,7 +49,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
runs=runs)

# Dropout benchmarks
dropout_benchmark_res = run_performance_test([nd.Dropout],
dropout_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "Dropout")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -62,7 +62,7 @@ def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
warmup=warmup,
runs=runs)
# BatchNorm benchmarks
batchnorm_benchmark_res = run_performance_test([nd.BatchNorm],
batchnorm_benchmark_res = run_performance_test([getattr(MX_OP_MODULE, "BatchNorm")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
20 changes: 9 additions & 11 deletions benchmark/opperf/nd_operations/nn_conv_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

import mxnet as mx
from mxnet import nd
from benchmark.opperf.utils.benchmark_utils import run_performance_test
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

"""Performance benchmark tests for MXNet NDArray Convolution and Pooling Operators.

Expand Down Expand Up @@ -51,7 +51,7 @@
"""


def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
pool_types = ['avg', 'max', 'sum']
global_pool_types = [0, 1]

Expand All @@ -61,7 +61,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
for pool_type in pool_types:
for global_pool in global_pool_types:
for pool1d_data in [(32, 3, 256), (32, 3, 64)]:
pool1d_benchmark_res += run_performance_test([nd.Pooling],
pool1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -70,13 +70,12 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": 1,
"pad": 1,
"layout": 'NCW'}
"pad": 1}
],
warmup=warmup,
runs=runs)
for pool2d_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
pool2d_benchmark_res += run_performance_test([nd.Pooling],
pool2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Pooling")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -85,8 +84,7 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
"pool_type": pool_type,
"global_pool": global_pool,
"stride": (1, 1),
"pad": (0, 0),
"layout": 'NCHW'}
"pad": (0, 0)}
],
warmup=warmup,
runs=runs)
Expand All @@ -95,11 +93,11 @@ def run_pooling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, r
return mx_pooling_op_results


def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
# Conv1D Benchmarks
conv1d_benchmark_res = []
for conv_data in [(32, 3, 256), (32, 3, 64)]:
conv1d_benchmark_res += run_performance_test([nd.Convolution],
conv1d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand All @@ -118,7 +116,7 @@ def run_convolution_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=1
# Conv2D Benchmarks
conv2d_benchmark_res = []
for conv_data in [(32, 3, 256, 256), (32, 3, 64, 64)]:
conv2d_benchmark_res += run_performance_test([nd.Convolution],
conv2d_benchmark_res += run_performance_test([getattr(MX_OP_MODULE, "Convolution")],
run_backward=True,
dtype=dtype,
ctx=ctx,
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/random_sampling_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from benchmark.opperf.utils.op_registry_utils import get_all_random_sampling_operators


def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the random sampling
operators in MXNet.

Expand All @@ -44,9 +44,9 @@ def run_mx_random_sampling_operators_benchmarks(ctx=mx.cpu(), dtype='float32', w
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/reduction_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the reduction
operators in MXNet.

Expand All @@ -41,9 +41,9 @@ def run_mx_reduction_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand Down
6 changes: 3 additions & 3 deletions benchmark/opperf/nd_operations/unary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from benchmark.opperf.utils.benchmark_utils import run_op_benchmarks


def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10, runs=50):
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the unary
operators in MXNet.

Expand All @@ -45,9 +45,9 @@ def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=10,
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
warmup: int, default 10
warmup: int, default 25
Number of times to run for warmup
runs: int, default 50
runs: int, default 100
Number of runs to capture benchmark results

Returns
Expand Down
17 changes: 11 additions & 6 deletions benchmark/opperf/opperf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
from benchmark.opperf.nd_operations.nn_basic_operators import run_nn_basic_operators_benchmarks

from benchmark.opperf.utils.common_utils import merge_map_list, save_to_file
from benchmark.opperf.utils.op_registry_utils import get_operators_with_no_benchmark
from benchmark.opperf.utils.op_registry_utils import get_operators_with_no_benchmark,\
get_current_runtime_features


def run_all_mxnet_operator_benchmarks(ctx=mx.cpu(), dtype='float32'):
Expand Down Expand Up @@ -102,17 +103,18 @@ def _parse_mxnet_context(ctx):
device_id = int(ctx[4:-1])
return mx.gpu(device_id)


def main():
# 1. GET USER INPUTS
parser = argparse.ArgumentParser(
description='Run all the MXNet operators (NDArray) benchmarks')
parser = argparse.ArgumentParser(description='Run all the MXNet operator benchmarks')

parser.add_argument('--ctx', type=str, default='cpu',
help='Global context to run all benchmarks. By default, cpu on a '
'CPU machine, gpu(0) on a GPU machine. '
'Valid Inputs - cpu, gpu, gpu(0), gpu(1)...')
parser.add_argument('--dtype', type=str, default='float32', help='DType (Precision) to run benchmarks. By default, '
'float32. Valid Inputs - float32, float64.')
'float32. Valid Inputs - float32, float64, int32, '
'int64')
parser.add_argument('-f', '--output-format', type=str, default='json',
choices=['json', 'md'],
help='Benchmark result output format. By default, json. '
Expand All @@ -129,17 +131,20 @@ def main():
# 2. RUN BENCHMARKS
ctx = _parse_mxnet_context(args.ctx)
dtype = args.dtype
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=args.dtype)
final_benchmark_results = run_all_mxnet_operator_benchmarks(ctx=ctx, dtype=dtype)

# 3. PREPARE OUTPUTS
save_to_file(final_benchmark_results, args.output_file, args.output_format)
run_time_features = get_current_runtime_features()
save_to_file(final_benchmark_results, args.output_file, args.output_format, run_time_features)

# 4. Generate list of MXNet operators not covered in benchmarks
ops_not_covered = get_operators_with_no_benchmark(final_benchmark_results.keys())
for idx, op in enumerate(ops_not_covered):
print(f"{idx}. {op}")

return 0


if __name__ == '__main__':
sys.exit(main())

Loading