diff --git a/src/operator/numpy/np_cumsum-inl.h b/src/operator/numpy/np_cumsum-inl.h
index 375d83b2240f..65e658115dc4 100644
--- a/src/operator/numpy/np_cumsum-inl.h
+++ b/src/operator/numpy/np_cumsum-inl.h
@@ -60,17 +60,17 @@ struct CumsumParam : public dmlc::Parameter<CumsumParam> {
 
 struct cumsum_forward {
   template<typename IType, typename OType>
-  MSHADOW_XINLINE static void Map(int i,
+  MSHADOW_XINLINE static void Map(index_t i,
                                   OType *out,
                                   const IType *in,
-                                  const int middle,
-                                  const int trailing) {
-    int left = i / trailing, right = i % trailing;
-    int offset = left * middle * trailing + right;
+                                  const index_t middle,
+                                  const index_t trailing) {
+    index_t left = i / trailing, right = i % trailing;
+    index_t offset = left * middle * trailing + right;
     const IType *lane_in = in + offset;
     OType *lane_out = out + offset;
     lane_out[0] = OType(lane_in[0]);
-    for (int j = 1; j < middle; ++j) {
+    for (index_t j = 1; j < middle; ++j) {
       lane_out[j * trailing] = lane_out[(j - 1) * trailing] + OType(lane_in[j * trailing]);
     }
   }
@@ -125,17 +125,17 @@ void CumsumForward(const nnvm::NodeAttrs& attrs,
 
 struct cumsum_backward {
   template<typename IType, typename OType>
-  MSHADOW_XINLINE static void Map(int i,
+  MSHADOW_XINLINE static void Map(index_t i,
                                   IType *igrad,
                                   const OType *ograd,
-                                  const int middle,
-                                  const int trailing) {
-    int left = i / trailing, right = i % trailing;
-    int offset = left * middle * trailing + right;
+                                  const index_t middle,
+                                  const index_t trailing) {
+    index_t left = i / trailing, right = i % trailing;
+    index_t offset = left * middle * trailing + right;
     const OType *lane_ograd = ograd + offset;
     IType *lane_igrad = igrad + offset;
     lane_igrad[(middle - 1) * trailing] = IType(lane_ograd[(middle - 1) * trailing]);
-    for (int j = middle - 2; j >= 0; --j) {
+    for (index_t j = middle - 2; j >= 0; --j) {
       lane_igrad[j * trailing] = lane_igrad[(j + 1) * trailing] + IType(lane_ograd[j * trailing]);
     }
   }
diff --git a/tests/nightly/test_large_array.py b/tests/nightly/test_large_array.py
index ee57f172c1c9..222c4525ae50 100644
--- a/tests/nightly/test_large_array.py
+++ b/tests/nightly/test_large_array.py
@@ -504,6 +504,16 @@ def check_ravel():
 
         assert out.shape[0] == LARGE_TENSOR_SHAPE
 
+    def check_cumsum():
+        a = nd.ones((LARGE_X, SMALL_Y))
+        axis = 1
+
+        res = nd.cumsum(a=a, axis=axis)
+
+        assert res.shape[0] == LARGE_X
+        assert res.shape[1] == SMALL_Y
+        assert res[0][SMALL_Y - 1] == 50.
+
     check_gluon_embedding()
     check_fully_connected()
     check_dense()
@@ -527,6 +537,7 @@ def check_ravel():
     check_embedding()
     check_spatial_transformer()
     check_ravel()
+    check_cumsum()
 
 
 def test_tensor():