From c6ac555d0f8d054a35a91f14c83d8cb3492944ba Mon Sep 17 00:00:00 2001
From: Rohit Kumar Srivastava <srivastava.141@buckeyemail.osu.edu>
Date: Mon, 14 Oct 2019 22:00:52 +0000
Subject: [PATCH] adding large tensor support for add_n and tests for more ops

---
 src/operator/tensor/elemwise_sum.h |  8 ++---
 tests/nightly/test_large_array.py  | 54 ++++++++++++++++++++++++++++++
 tests/nightly/test_large_vector.py | 54 ++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+), 4 deletions(-)
diff --git a/src/operator/tensor/elemwise_sum.h b/src/operator/tensor/elemwise_sum.h
index 08f57d15fbf8..e89e9d799903 100644
--- a/src/operator/tensor/elemwise_sum.h
+++ b/src/operator/tensor/elemwise_sum.h
@@ -39,15 +39,15 @@ namespace op {
 
 struct Sum {
   template<typename DType>
-  MSHADOW_XINLINE static DType sum(int i, const DType* a) {
+  MSHADOW_XINLINE static DType sum(index_t i, const DType* a) {
     return a[i];
   }
   template<typename DType, typename... DTypes>
-  MSHADOW_XINLINE static DType sum(int i, const DType* a, const DTypes... b) {
+  MSHADOW_XINLINE static DType sum(index_t i, const DType* a, const DTypes... b) {
     return a[i] + sum(i, b...);
   }
   template<typename DType, typename... DTypes>
-  MSHADOW_XINLINE static void Map(int i, DType* out, const OpReqType req, const DType* in0,
+  MSHADOW_XINLINE static void Map(index_t i, DType* out, const OpReqType req, const DType* in0,
     const DTypes... ins) {
     KERNEL_ASSIGN(out[i], req, sum(i, in0, ins...));
   }
@@ -64,7 +64,7 @@ void ElementWiseSumCompute_(const nnvm::NodeAttrs& attrs,
   size_t size = in_data.size();
   Stream<xpu> *s = ctx.get_stream<xpu>();
   DType* out_dptr = out_data[0].dptr<DType>();
-  int out_size = static_cast<int>((out_data[0].Size() + DataType<DType>::kLanes - 1)
+  index_t out_size = static_cast<index_t>((out_data[0].Size() + DataType<DType>::kLanes - 1)
                                   /DataType<DType>::kLanes);
   switch (size) {
     case 2: {
diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py
index 7ab1d025b75b..14befb272486 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+import tempfile
 import math
 import numpy as np
 import mxnet as mx
@@ -1439,6 +1441,58 @@ def npy_instance_norm(data, gamma, beta, axis, eps=1E-5):
                         forward_check_eps)
 
 
+def test_load_save():
+    x = create_2d_tensor(SMALL_Y, LARGE_X)
+    tmp = tempfile.mkdtemp()
+    tmpfile = os.path.join(tmp, 'large_tensor')
+    nd.save(tmpfile, [x])
+    y = nd.load(tmpfile)
+    y = y[0]
+    assert x[0][0] == y[0][0]
+    assert x[-1][-1]== y[-1][-1]
+
+
+def test_add_n():
+    x = [nd.ones(LARGE_X) for j in range(SMALL_Y)]
+    y = nd.add_n(*x)
+    assert y[0] == SMALL_Y
+    assert y[-1] == SMALL_Y
+
+
+def test_modulo():
+    x = mx.nd.ones((SMALL_Y, LARGE_X))*6
+    y = mx.nd.ones(LARGE_X)*4
+    z = (x%y)
+    assert z[0][0] == 2
+    assert z[-1][-1] == 2
+    x = mx.nd.ones((SMALL_Y, LARGE_X))*5
+    z = nd.modulo(x,y)
+    assert z[0][0] == 1
+    assert z[-1][-1] == 1
+
+
+def test_maximum():
+    x = mx.nd.ones((SMALL_Y, LARGE_X))*3
+    y = mx.nd.ones(LARGE_X)*4
+    z = nd.maximum(x, y)
+    assert z[0][0] == 4
+    assert z[-1][-1] == 4
+    z = nd.maximum(x, 5)
+    assert z[0][0] == 5
+    assert z[-1][-1] == 5
+
+
+def test_minimum():
+    x = mx.nd.ones((SMALL_Y, LARGE_X))*3
+    y = mx.nd.ones(LARGE_X)*2
+    z = nd.minimum(x, y)
+    assert z[0][0] == 2
+    assert z[-1][-1] == 2
+    z = nd.minimum(x, 5)
+    assert z[0][0] == 3
+    assert z[-1][-1] == 3
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()
diff --git a/tests/nightly/test_large_vector.py b/tests/nightly/test_large_vector.py
index 75a902ce221f..53a8bd07ff78 100644
--- a/tests/nightly/test_large_vector.py
+++ b/tests/nightly/test_large_vector.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+import tempfile
 import math
 import numpy as np
 import mxnet as mx
@@ -876,6 +878,58 @@ def check_degrees():
     check_degrees()
 
 
+def test_load_save():
+    x = create_vector(size=LARGE_X)
+    tmp = tempfile.mkdtemp()
+    tmpfile = os.path.join(tmp, 'large_vector')
+    nd.save(tmpfile, [x])
+    y = nd.load(tmpfile)
+    y = y[0]
+    assert x[0] == y[0]
+    assert x[-1] == y[-1]
+
+
+def test_add_n():
+    x = [nd.ones(LARGE_X)]
+    y = nd.add_n(*x)
+    assert y[0] == 1
+    assert y[-1] == 1
+
+
+def test_modulo():
+    x = mx.nd.ones(LARGE_X)*6
+    y = mx.nd.ones(LARGE_X)*4
+    z = (x%y)
+    assert z[0] == 2
+    assert z[-1] == 2
+    x = mx.nd.ones(LARGE_X)*5
+    z = nd.modulo(x,y)
+    assert z[0] == 1
+    assert z[-1] == 1
+
+
+def test_maximum():
+    x = mx.nd.ones(LARGE_X)*3
+    y = mx.nd.ones(LARGE_X)*4
+    z = nd.maximum(x, y)
+    assert z[0] == 4
+    assert z[-1] == 4
+    z = nd.maximum(x, 5)
+    assert z[0] == 5
+    assert z[-1] == 5
+
+
+def test_minimum():
+    x = mx.nd.ones(LARGE_X)*3
+    y = mx.nd.ones(LARGE_X)*2
+    z = nd.minimum(x, y)
+    assert z[0] == 2
+    assert z[-1] == 2
+    z = nd.minimum(x, 5)
+    assert z[0] == 3
+    assert z[-1] == 3
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()