Skip to content

Commit

Permalink
[Large Tensor] Implemented LT flag for OpPerf testing (apache#17449)
Browse files Browse the repository at this point in the history
* Passing large_tensor parameter down

* Adding large tensor testing functionality for convolutional operators

* Added large tensor test functionality for conv ops

* Fixing sizing for conv ops

* Added gemm large tensor, print on conv

* Updated input for gemm ops and print statements

* Fixed deconv large tensor test

* Added bias for deconv

* Added test functionality for nn_activation and nn_basic ops

* Fixed deconv bias, implemented large tensor test logic for general ops, added default data for large tensor test

* Dropped unnecessary print statements

* Fixed lint errors

* Added large_tensor parameter to existing function descriptions, added descriptions for functions missing descriptions

* Adding docs, changed large_tensor to int64_tensor for clarity

* Added warmup/runs to gemm ops, debugging process failure

* Resolved merge conficts, added default params and input switching functionality

* Dynamic input handling for default inputs, additional custom data for int64

* Fixed RPD issue

* Everything through reduction ops working

* Passing large_tensor parameter down

* Adding large tensor testing functionality for convolutional operators

* Added large tensor test functionality for conv ops

* Fixing sizing for conv ops

* Added gemm large tensor, print on conv

* Updated input for gemm ops and print statements

* Fixed deconv large tensor test

* Added bias for deconv

* Added test functionality for nn_activation and nn_basic ops

* Fixed deconv bias, implemented large tensor test logic for general ops, added default data for large tensor test

* Dropped unnecessary print statements

* Fixed lint errors

* Added large_tensor parameter to existing function descriptions, added descriptions for functions missing descriptions

* Adding docs, changed large_tensor to int64_tensor for clarity

* Added warmup/runs to gemm ops, debugging process failure

* Resolved merge conficts, added default params and input switching functionality

* Dynamic input handling for default inputs, additional custom data for int64

* Fixed RPD issue

* Everything through reduction ops working

* Random sampling & loss ops working

* Added indices, depth, ravel_data in default_params

* Added indexing ops - waiting for merge on ravel

* Added optimizer ops

* All misc ops working

* All NN Basic ops working

* Fixed LT input for ROIPooling

* Refactored NN Conv tests

* Added test for inline optimizer ops

* Dropping extra tests to decrease execution time

* Switching to inline tests for RNN to support additional modes

* Added state_cell as NDArray param, removed linalg testing for int64 tensor

* Cleaned up styling

* Fixed conv and deconv tests

* Retrigger CI for continuous build

* Cleaned up GEMM op inputs

* Dropped unused param from default_params
  • Loading branch information
connorgoggins authored and MoisesHer committed Apr 10, 2020
1 parent e6fd114 commit 2704f1b
Show file tree
Hide file tree
Showing 19 changed files with 941 additions and 253 deletions.
8 changes: 5 additions & 3 deletions benchmark/opperf/nd_operations/array_rearrange.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
"""


def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the
def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the
rearrange operators in MXNet.
Parameters
Expand All @@ -41,6 +41,8 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand All @@ -55,5 +57,5 @@ def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='
mx_rearrange_ops = get_all_rearrange_operators()

# Run benchmarks
mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, warmup, runs)
mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_rearrange_op_results
26 changes: 17 additions & 9 deletions benchmark/opperf/nd_operations/binary_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
get_all_elemen_wise_binary_operators, get_all_misc_binary_operators


def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the miscellaneous
def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the miscellaneous
binary operators in MXNet.
Parameters
Expand All @@ -48,6 +48,10 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profi
Context to run benchmarks
dtype: str, default 'float32'
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand All @@ -61,12 +65,12 @@ def run_mx_binary_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profi
# Fetch all Miscellaneous Binary Operators
mx_binary_misc_ops = get_all_misc_binary_operators()
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx, profiler, warmup, runs)
mx_binary_op_results = run_op_benchmarks(mx_binary_misc_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_binary_op_results


def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the binary
def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the binary
broadcast operators in MXNet.
Parameters
Expand All @@ -77,6 +81,8 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand All @@ -90,12 +96,12 @@ def run_mx_binary_broadcast_operators_benchmarks(ctx=mx.cpu(), dtype='float32',
# Fetch all Binary Broadcast Operators
mx_binary_broadcast_ops = get_all_broadcast_binary_operators()
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, profiler, warmup, runs)
mx_binary_op_results = run_op_benchmarks(mx_binary_broadcast_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_binary_op_results


def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the binary
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the binary
element_wise operators in MXNet.
Parameters
Expand All @@ -106,6 +112,8 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 10
Number of times to run for warmup
runs: int, default 50
Expand All @@ -119,5 +127,5 @@ def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32
# Fetch all Binary Element_wise Operators
mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
# Run benchmarks
mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, profiler, warmup, runs)
mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_binary_op_results
84 changes: 59 additions & 25 deletions benchmark/opperf/nd_operations/gemm_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
"""


def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype)for all the GEMM
def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the GEMM
operators (dot, batch_dot, khatri_rao) in MXNet.
Parameters
Expand All @@ -47,6 +47,8 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand All @@ -57,43 +59,75 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
"""
# Benchmark tests for dot operator
standard_inputs_dot = [{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
{"lhs": (1000, 10),
"rhs": (1000, 10),
"transpose_b": True},
{"lhs": (1000, 1),
"rhs": (100, 1000),
"transpose_a": True,
"transpose_b": True}]
int64_tensor_inputs_dot = [{"lhs": (2**16, 2**16),
"rhs": (2**16, 2**16)},
{"lhs": (4, 2**30),
"rhs": (4, 2**30),
"transpose_b": True},
{"lhs": (2**28, 16),
"rhs": (16, 2**28),
"transpose_a": True,
"transpose_b": True}]
standard_inputs_batch_dot = [{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
{"lhs": (32, 1000, 10),
"rhs": (32, 1000, 10),
"transpose_b": True},
{"lhs": (32, 1000, 1),
"rhs": (32, 100, 1000),
"transpose_a": True,
"transpose_b": True}]
int64_tensor_inputs_batch_dot = [{"lhs": (1, 2**16, 2**16),
"rhs": (1, 2**16, 2**16)},
{"lhs": (1, 4, 2**30),
"rhs": (1, 4, 2**30),
"transpose_b": True},
{"lhs": (1, 2**28, 16),
"rhs": (1, 16, 2**28),
"transpose_a": True,
"transpose_b": True}]
standard_inputs_khatri_rao = [{"args": [(32, 32), (32, 32)]},
{"args": [(64, 64), (64, 64)]}]
int64_tensor_inputs_khatri_rao = [{"args": [(2**32, 1), (2**32, 1)]}]

if int64_tensor == 'on':
inputs_dot = int64_tensor_inputs_dot
inputs_batch_dot = int64_tensor_inputs_batch_dot
inputs_khatri_rao = int64_tensor_inputs_khatri_rao
else:
inputs_dot = standard_inputs_dot
inputs_batch_dot = standard_inputs_batch_dot
inputs_khatri_rao = standard_inputs_khatri_rao

# Benchmark tests for dot and batch_dot operators
dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (1024, 1024),
"rhs": (1024, 1024)},
{"lhs": (1000, 10),
"rhs": (1000, 10),
"transpose_b": True},
{"lhs": (1000, 1),
"rhs": (100, 1000),
"transpose_a": True,
"transpose_b": True}],
inputs=inputs_dot,
warmup=warmup, runs=runs, profiler=profiler)
# Benchmark tests for batch_dot operator

batch_dot_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
dtype=dtype, ctx=ctx,
inputs=[{"lhs": (32, 1024, 1024),
"rhs": (32, 1024, 1024)},
{"lhs": (32, 1000, 10),
"rhs": (32, 1000, 10),
"transpose_b": True},
{"lhs": (32, 1000, 1),
"rhs": (32, 100, 1000),
"transpose_a": True,
"transpose_b": True}],
inputs=inputs_batch_dot,
warmup=warmup, runs=runs, profiler=profiler)
# Operator khatri_rao is not yet implemented for GPU
# Operator khatri_rao is not yet implemented for GPU
khatri_rao_benchmark_res = []
if ctx != mx.gpu():
# Benchmark tests for khatri_rao operator
khatri_rao_benchmark_res = run_performance_test(
[getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False,
dtype=dtype, ctx=ctx,
inputs=[{"args": [(32, 32), (32, 32)]},
{"args": [(64, 64), (64, 64)]}],
inputs=inputs_khatri_rao,
warmup=warmup, runs=runs, profiler=profiler)

# Prepare combined results for GEMM operators
Expand Down
8 changes: 5 additions & 3 deletions benchmark/opperf/nd_operations/indexing_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
"""


def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the indexing routines
def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and data size (int64_tensor) for all the indexing routines
in MXNet.
Parameters
Expand All @@ -47,6 +47,8 @@ def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='na
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand All @@ -61,5 +63,5 @@ def run_indexing_routines_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='na
mx_indexing_ops = get_all_indexing_routines()

# Run benchmarks
mx_indexing_op_results = run_op_benchmarks(mx_indexing_ops, dtype, ctx, profiler, warmup, runs)
mx_indexing_op_results = run_op_benchmarks(mx_indexing_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return mx_indexing_op_results
8 changes: 5 additions & 3 deletions benchmark/opperf/nd_operations/linalg_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
from benchmark.opperf.utils.common_utils import merge_map_list
from benchmark.opperf.rules.default_params import MX_OP_MODULE

def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
"""Runs benchmarks with the given context and precision (dtype) for all the linear algebra
def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
"""Runs benchmarks with the given context, precision (dtype), and data size (int64_tensor) for all the linear algebra
operators in MXNet.
Parameters
Expand All @@ -46,6 +46,8 @@ def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nat
Precision to use for benchmarks
profiler: str, default 'native'
Type of Profiler to use (native/python)
int64_tensor: str, default 'off'
Input tensor size to use for tests (if on, dimensions >= 2**32)
warmup: int, default 25
Number of times to run for warmup
runs: int, default 100
Expand Down Expand Up @@ -74,5 +76,5 @@ def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nat
# Fetch all Linear Algebra Operators
mx_linalg_ops = get_all_linalg_operators()
# Run benchmarks
mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx, profiler, warmup, runs)
mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
return merge_map_list(linalg_potrf_benchmark + [mx_linalg_op_results])
Loading

0 comments on commit 2704f1b

Please sign in to comment.