From fec6fb215d371579ea56ea1ad8990905c5b0b67e Mon Sep 17 00:00:00 2001 From: Connor Goggins Date: Fri, 28 Feb 2020 14:56:45 -0800 Subject: [PATCH] Cleaned up GEMM op inputs --- .../opperf/nd_operations/gemm_operators.py | 137 +++++++++--------- 1 file changed, 65 insertions(+), 72 deletions(-) diff --git a/benchmark/opperf/nd_operations/gemm_operators.py b/benchmark/opperf/nd_operations/gemm_operators.py index d0d51422721c..55b3435a8f24 100644 --- a/benchmark/opperf/nd_operations/gemm_operators.py +++ b/benchmark/opperf/nd_operations/gemm_operators.py @@ -59,83 +59,76 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ - # Benchmark tests for dot and batch_dot operators - if int64_tensor == "on": - dot_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "dot")], run_backward=True, - dtype=dtype, ctx=ctx, - inputs=[{"lhs": (2**16, 2**16), - "rhs": (2**16, 2**16)}, - {"lhs": (4, 2**30), - "rhs": (4, 2**30), - "transpose_b": True}, - {"lhs": (2**28, 16), - "rhs": (16, 2**28), - "transpose_a": True, - "transpose_b": True}], - warmup=warmup, runs=runs, profiler=profiler) + standard_inputs_dot = [{"lhs": (1024, 1024), + "rhs": (1024, 1024)}, + {"lhs": (1000, 10), + "rhs": (1000, 10), + "transpose_b": True}, + {"lhs": (1000, 1), + "rhs": (100, 1000), + "transpose_a": True, + "transpose_b": True}] + int64_tensor_inputs_dot = [{"lhs": (2**16, 2**16), + "rhs": (2**16, 2**16)}, + {"lhs": (4, 2**30), + "rhs": (4, 2**30), + "transpose_b": True}, + {"lhs": (2**28, 16), + "rhs": (16, 2**28), + "transpose_a": True, + "transpose_b": True}] + standard_inputs_batch_dot = [{"lhs": (32, 1024, 1024), + "rhs": (32, 1024, 1024)}, + {"lhs": (32, 1000, 10), + "rhs": (32, 1000, 10), + "transpose_b": True}, + {"lhs": (32, 1000, 1), + "rhs": (32, 100, 1000), + "transpose_a": True, + "transpose_b": True}] + int64_tensor_inputs_batch_dot = [{"lhs": (1, 2**16, 2**16), + "rhs": (1, 2**16, 2**16)}, + {"lhs": (1, 4, 2**30), + "rhs": (1, 4, 2**30), + "transpose_b": True}, + {"lhs": (1, 2**28, 16), + "rhs": (1, 16, 2**28), + "transpose_a": True, + "transpose_b": True}] + standard_inputs_khatri_rao = [{"args": [(32, 32), (32, 32)]}, + {"args": [(64, 64), (64, 64)]}] + int64_tensor_inputs_khatri_rao = [{"args": [(2**32, 1), (2**32, 1)]}] - batch_dot_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "batch_dot")], run_backward=True, - dtype=dtype, ctx=ctx, - inputs=[{"lhs": (1, 2**16, 2**16), - "rhs": (1, 2**16, 2**16)}, - {"lhs": (1, 4, 2**30), - "rhs": (1, 4, 2**30), - "transpose_b": True}, - {"lhs": (1, 2**28, 16), - "rhs": (1, 16, 2**28), - "transpose_a": True, - "transpose_b": True}], - warmup=warmup, runs=runs, profiler=profiler) - # Operator khatri_rao is not yet implemented for GPU - khatri_rao_benchmark_res = [] - if ctx != mx.gpu(): - # Benchmark tests for khatri_rao operator - khatri_rao_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False, - dtype=dtype, ctx=ctx, - inputs=[{"args": [(32, 32), (32, 32)]}, - {"args": [(64, 64), (64, 64)]}], - warmup=warmup, runs=runs, profiler=profiler) + if int64_tensor == 'on': + inputs_dot = int64_tensor_inputs_dot + inputs_batch_dot = int64_tensor_inputs_batch_dot + inputs_khatri_rao = int64_tensor_inputs_khatri_rao else: - dot_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "dot")], run_backward=True, - dtype=dtype, ctx=ctx, - inputs=[{"lhs": (1024, 1024), - "rhs": (1024, 1024)}, - {"lhs": (1000, 10), - "rhs": (1000, 10), - "transpose_b": True}, - {"lhs": (1000, 1), - "rhs": (100, 1000), - "transpose_a": True, - "transpose_b": True}], - warmup=warmup, runs=runs, profiler=profiler) + inputs_dot = standard_inputs_dot + inputs_batch_dot = standard_inputs_batch_dot + inputs_khatri_rao = standard_inputs_khatri_rao + + # Benchmark tests for dot and batch_dot operators + dot_benchmark_res = run_performance_test( + [getattr(MX_OP_MODULE, "dot")], run_backward=True, + dtype=dtype, ctx=ctx, + inputs=inputs_dot, + warmup=warmup, runs=runs, profiler=profiler) - batch_dot_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "batch_dot")], run_backward=True, + batch_dot_benchmark_res = run_performance_test( + [getattr(MX_OP_MODULE, "batch_dot")], run_backward=True, + dtype=dtype, ctx=ctx, + inputs=inputs_batch_dot, + warmup=warmup, runs=runs, profiler=profiler) + # Operator khatri_rao is not yet implemented for GPU + khatri_rao_benchmark_res = [] + if ctx != mx.gpu(): + # Benchmark tests for khatri_rao operator + khatri_rao_benchmark_res = run_performance_test( + [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False, dtype=dtype, ctx=ctx, - inputs=[{"lhs": (32, 1024, 1024), - "rhs": (32, 1024, 1024)}, - {"lhs": (32, 1000, 10), - "rhs": (32, 1000, 10), - "transpose_b": True}, - {"lhs": (32, 1000, 1), - "rhs": (32, 100, 1000), - "transpose_a": True, - "transpose_b": True}], + inputs=inputs_khatri_rao, warmup=warmup, runs=runs, profiler=profiler) - # Operator khatri_rao is not yet implemented for GPU - khatri_rao_benchmark_res = [] - if ctx != mx.gpu(): - # Benchmark tests for khatri_rao operator - khatri_rao_benchmark_res = run_performance_test( - [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False, - dtype=dtype, ctx=ctx, - inputs=[{"args": [(32, 32), (32, 32)]}, - {"args": [(64, 64), (64, 64)]}], - warmup=warmup, runs=runs, profiler=profiler) # Prepare combined results for GEMM operators mx_gemm_op_results = merge_map_list(dot_benchmark_res + batch_dot_benchmark_res + khatri_rao_benchmark_res)