From 1c60e655c9b5d64d6bcd5a066b6b2a1529d24db2 Mon Sep 17 00:00:00 2001
From: Connor Goggins <cgoggins0@gmail.com>
Date: Fri, 31 Jan 2020 16:10:47 -0800
Subject: [PATCH 1/3] Added run_perf_test call for khatri_rao, added supporting
 comments

---
 benchmark/opperf/nd_operations/gemm_operators.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/benchmark/opperf/nd_operations/gemm_operators.py b/benchmark/opperf/nd_operations/gemm_operators.py
index 73337538d89f..6c89576d867b 100644
--- a/benchmark/opperf/nd_operations/gemm_operators.py
+++ b/benchmark/opperf/nd_operations/gemm_operators.py
@@ -23,6 +23,7 @@
 
 1. dot
 2. batch_dot
+3. khatri_rao
 
 TODO
 3. As part of default tests, following needs to be added:
@@ -36,7 +37,7 @@
 
 def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
     """Runs benchmarks with the given context and precision (dtype)for all the GEMM
-    operators (dot, batch_dot) in MXNet.
+    operators (dot, batch_dot, khatri_rao) in MXNet.
 
     Parameters
     ----------
@@ -54,7 +55,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
     Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.
 
     """
-    # Benchmark tests for dot and batch_dot operators
+    # Benchmark tests for dot operator
     dot_benchmark_res = run_performance_test(
         [getattr(MX_OP_MODULE, "dot")], run_backward=True,
         dtype=dtype, ctx=ctx,
@@ -68,7 +69,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
                  "transpose_a": True,
                  "transpose_b": True}],
         warmup=warmup, runs=runs, profiler=profiler)
-
+    # Benchmark tests for batch_dot operator
     batch_dot_benchmark_res = run_performance_test(
         [getattr(MX_OP_MODULE, "batch_dot")], run_backward=True,
         dtype=dtype, ctx=ctx,
@@ -82,7 +83,14 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
                  "transpose_a": True,
                  "transpose_b": True}],
         warmup=warmup, runs=runs, profiler=profiler)
+    # Benchmark tests for khatri_rao operator
+    khatri_rao_benchmark_res = run_performance_test(
+        [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False,
+        dtype=dtype, ctx=ctx,
+        inputs=[{"args": [(32, 32), (32, 32)]},
+                {"args": [(64, 64), (64, 64)]}],
+        warmup=warmup, runs=runs, profiler=profiler)
 
     # Prepare combined results for GEMM operators
-    mx_gemm_op_results = merge_map_list(dot_benchmark_res + batch_dot_benchmark_res)
+    mx_gemm_op_results = merge_map_list(dot_benchmark_res + batch_dot_benchmark_res + khatri_rao_benchmark_res)
     return mx_gemm_op_results

From a5c5c5f73481726a947c33868225ef1805286d4f Mon Sep 17 00:00:00 2001
From: Connor Goggins <cgoggins0@gmail.com>
Date: Fri, 31 Jan 2020 16:26:38 -0800
Subject: [PATCH 2/3] Disabled khatri_rao on GPU as it has not yet been
 implemented

---
 benchmark/opperf/nd_operations/gemm_operators.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/benchmark/opperf/nd_operations/gemm_operators.py b/benchmark/opperf/nd_operations/gemm_operators.py
index 6c89576d867b..b51b51c30993 100644
--- a/benchmark/opperf/nd_operations/gemm_operators.py
+++ b/benchmark/opperf/nd_operations/gemm_operators.py
@@ -83,13 +83,15 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
                  "transpose_a": True,
                  "transpose_b": True}],
         warmup=warmup, runs=runs, profiler=profiler)
-    # Benchmark tests for khatri_rao operator
-    khatri_rao_benchmark_res = run_performance_test(
-        [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False,
-        dtype=dtype, ctx=ctx,
-        inputs=[{"args": [(32, 32), (32, 32)]},
-                {"args": [(64, 64), (64, 64)]}],
-        warmup=warmup, runs=runs, profiler=profiler)
+    # Operator khatri_rao is not yet implemented for GPU
+    if ctx != mx.gpu():
+        # Benchmark tests for khatri_rao operator
+        khatri_rao_benchmark_res = run_performance_test(
+            [getattr(MX_OP_MODULE, "khatri_rao")], run_backward=False,
+            dtype=dtype, ctx=ctx,
+            inputs=[{"args": [(32, 32), (32, 32)]},
+                    {"args": [(64, 64), (64, 64)]}],
+            warmup=warmup, runs=runs, profiler=profiler)
 
     # Prepare combined results for GEMM operators
     mx_gemm_op_results = merge_map_list(dot_benchmark_res + batch_dot_benchmark_res + khatri_rao_benchmark_res)

From 43ddcfa6575afc6a839938af4e14e87083bb23c5 Mon Sep 17 00:00:00 2001
From: Connor Goggins <cgoggins0@gmail.com>
Date: Fri, 31 Jan 2020 16:29:47 -0800
Subject: [PATCH 3/3] Fixed reference pre-assignment

---
 benchmark/opperf/nd_operations/gemm_operators.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/benchmark/opperf/nd_operations/gemm_operators.py b/benchmark/opperf/nd_operations/gemm_operators.py
index b51b51c30993..b0d6169c6401 100644
--- a/benchmark/opperf/nd_operations/gemm_operators.py
+++ b/benchmark/opperf/nd_operations/gemm_operators.py
@@ -84,6 +84,7 @@ def run_gemm_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='nativ
                  "transpose_b": True}],
         warmup=warmup, runs=runs, profiler=profiler)
     # Operator khatri_rao is not yet implemented for GPU
+    khatri_rao_benchmark_res = []
     if ctx != mx.gpu():
         # Benchmark tests for khatri_rao operator
         khatri_rao_benchmark_res = run_performance_test(