diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py
index cdab800aa6e7..a83447f64952 100644
--- a/python/mxnet/_numpy_op_doc.py
+++ b/python/mxnet/_numpy_op_doc.py
@@ -523,6 +523,7 @@ def _np_roll(a, shift, axis=None):
     roll(a, shift, axis=None):
 
     Roll array elements along a given axis.
+    
     Elements that roll beyond the last position are re-introduced at
     the first.
 
diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
index 1609faf24897..f97de65f00f1 100644
--- a/src/operator/numpy/np_matrix_op.cc
+++ b/src/operator/numpy/np_matrix_op.cc
@@ -401,8 +401,15 @@ NNVM_REGISTER_OP(_np_roll)
 .set_attr<nnvm::FGradient>("FGradient",
   [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
      const NumpyRollParam& param = nnvm::get<NumpyRollParam>(n->attrs.parsed);
+     if (!param.shift.has_value()) {
+       LOG(FATAL) << "roll missing 1 required positional argument: 'shift'.";
+     }
+     mxnet::TShape shifts(param.shift.value());
+     for (int i = 0; i < shifts.ndim(); ++i) {
+       shifts[i] = -shifts[i];
+     }
      std::ostringstream os1;
-     os1 << -param.shift;
+     os1 << dmlc::optional<mxnet::TShape>(shifts);
      std::ostringstream os2;
      os2 << param.axis;
      return MakeNonlossGradNode("_np_roll", n, ograds, {},
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index a3b223ad0d3b..c446f19ed153 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -1883,7 +1883,7 @@ def __init__(self):
             super(TestCopysign, self).__init__()
 
         def hybrid_forward(self, F, a1, a2):
-	            return F.np.copysign(a1, a2)
+            return F.np.copysign(a1, a2)
 
     def get_grad(a1, a2):
         sign = _np.logical_or(_np.logical_and(a1 < 0, a2 < 0),
@@ -2075,6 +2075,61 @@ def get_grad(UT, L, V):
                     assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol)
 
 
+@with_seed()
+@use_np
+def test_np_roll():
+    class TestRoll(HybridBlock):
+        def __init__(self, shift=None, axis=None):
+            super(TestRoll, self).__init__()
+            self._shift = shift
+            self._axis = axis
+
+        def hybrid_forward(self, F, x):
+            return F.np.roll(x, shift=self._shift, axis=self._axis)
+
+    dtypes = ['int32', 'int64', 'float16', 'float32', 'float64']
+    configs = [
+        ((), (3,), None),
+        ((1,), (-3,), None),
+        ((20,), (-3,), None),
+        ((3,), (2,), 0),
+        ((2, 3, 4), (12,), (1,)),
+        ((2, 3, 4), (10, -10), (0, 1)),
+        ((2, 3, 4, 5), (0, 1), (-1, 2)),
+        ((2, 3, 0, 1), (0, 1), (-1, 2)),
+        ((2, 3, 4, 5), 10, (0, 2)),
+    ]
+    for dtype in dtypes:
+        for config in configs:
+            for hybridize in [False, True]:
+                shape, shift, axis = config[0], config[1], config[2]
+                x = rand_ndarray(shape=shape, dtype=dtype).as_np_ndarray()
+                net = TestRoll(shift=shift, axis=axis)
+                np_out = _np.roll(x.asnumpy(), shift=shift, axis=axis)
+                if hybridize:
+                    net.hybridize()
+                x.attach_grad()
+                with mx.autograd.record():
+                    mx_out = net(x)
+                assert mx_out.shape == np_out.shape
+                mx_out.backward()
+                assert same(mx_out.asnumpy(), np_out)
+                assert same(x.grad.shape, x.shape)
+                assert same(x.grad.asnumpy(), _np.ones(shape))
+
+                # test imperativen
+                np_out = _np.roll(x.asnumpy(), shift=shift, axis=axis)
+                mx_out = np.roll(x, shift=shift, axis=axis)
+                assert same(mx_out.asnumpy(), np_out)
+
+                # test numeric
+                if dtype in ['float16', 'float32', 'float64']:
+                    x_sym = mx.sym.Variable("x").as_np_ndarray()
+                    mx_sym = mx.sym.np.roll(x_sym, shift=shift, axis=axis).as_nd_ndarray()
+                    check_numeric_gradient(mx_sym, [x.as_nd_ndarray()],
+                                           numeric_eps=1e-3, rtol=1e-3, atol=1e-5, dtype=dtype)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()