apache · reminisce · Sep 22, 2019 · Aug 24, 2019 · Sep 21, 2019
@@ -516,3 +516,70 @@ def _np__linalg_svd(a):
     array(0.)
     """
     pass
+
+
+def _np_roll(a, shift, axis=None):
+    """
+    roll(a, shift, axis=None):
+
+    Roll array elements along a given axis.
+
+    Elements that roll beyond the last position are re-introduced at
+    the first.
+
+    Parameters
+    ----------
+    a : ndarray
+    Input array.
+    shift : int or tuple of ints
+    The number of places by which elements are shifted.  If a tuple,
+    then `axis` must be a tuple of the same size, and each of the
+    given axes is shifted by the corresponding number.  If an int
+    while `axis` is a tuple of ints, then the same value is used for
+    all given axes.
+    axis : int or tuple of ints, optional
+    Axis or axes along which elements are shifted.  By default, the
+    array is flattened before shifting, after which the original
+    shape is restored.
+
+    Returns
+    -------
+    res : ndarray
+    Output array, with the same shape as `a`.
+
+    Notes
+    -----
+    Supports rolling over multiple dimensions simultaneously.
+
+    Examples
+    --------
+    >>> x = np.arange(10)
+    >>> np.roll(x, 2)
+    array([8., 9., 0., 1., 2., 3., 4., 5., 6., 7.])
+    >>> np.roll(x, -2)
+    array([2., 3., 4., 5., 6., 7., 8., 9., 0., 1.])
+
+    >>> x2 = np.reshape(x, (2,5))
+    >>> x2
+    array([[0., 1., 2., 3., 4.],
+           [5., 6., 7., 8., 9.]])
+    >>> np.roll(x2, 1)
+    array([[9., 0., 1., 2., 3.],
+           [4., 5., 6., 7., 8.]])
+    >>> np.roll(x2, -1)
+    array([[1., 2., 3., 4., 5.],
+           [6., 7., 8., 9., 0.]])
+    >>> np.roll(x2, 1, axis=0)
+    array([[5., 6., 7., 8., 9.],
+           [0., 1., 2., 3., 4.]])
+    >>> np.roll(x2, -1, axis=0)
+    array([[5., 6., 7., 8., 9.],
+           [0., 1., 2., 3., 4.]])
+    >>> np.roll(x2, 1, axis=1)
+    array([[4., 0., 1., 2., 3.],
+           [9., 5., 6., 7., 8.]])
+    >>> np.roll(x2, -1, axis=1)
+    array([[1., 2., 3., 4., 0.],
+           [6., 7., 8., 9., 5.]])
+    """
+    pass
diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h
@@ -26,6 +26,7 @@
 #define MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_
 
 #include <vector>
+#include <algorithm>
 #include "../tensor/matrix_op-inl.h"
 #include "../nn/concat-inl.h"
 
@@ -140,6 +141,162 @@ void NumpyVstackBackward(const nnvm::NodeAttrs& attrs,
   });
 }
 
+struct NumpyRollParam : public dmlc::Parameter<NumpyRollParam> {
+  dmlc::optional<mxnet::TShape> shift;
+  dmlc::optional<mxnet::TShape> axis;
+  DMLC_DECLARE_PARAMETER(NumpyRollParam) {
+    DMLC_DECLARE_FIELD(shift)
+    .set_default(dmlc::optional<mxnet::TShape>())
+    .describe("The number of places by which elements are shifted. If a tuple,"
+              "then axis must be a tuple of the same size, and each of the given axes is shifted"
+              "by the corresponding number. If an int while axis is a tuple of ints, "
+              "then the same value is used for all given axes.");
+    DMLC_DECLARE_FIELD(axis)
+    .set_default(dmlc::optional<mxnet::TShape>())
+    .describe("Axis or axes along which elements are shifted. By default, the array is flattened"
+              "before shifting, after which the original shape is restored.");
+  }
+};
+
+template<int req>
+struct RollAxisNone_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+                                  const int size, const int shift) {
+    int new_index = i - shift < 0 ? i - shift + size : i - shift;
+    KERNEL_ASSIGN(out_data[i], req, in_data[new_index]);
+  }
+};
+
+template<int req>
+struct RollAxis_forward {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data,
+                                  const size_t* new_index) {
+    KERNEL_ASSIGN(out_data[i], req, in_data[new_index[i]]);
+  }
+};
+
+inline void RollDfs(const std::vector<std::vector<size_t>>& new_axes,
+                    const std::vector<size_t>& value,
+                    std::vector<size_t>* new_index,
+                    int index, int ndim, int mid) {
+  for (int a : new_axes[index]) {
+    if (index == ndim - 1) {
+      std::vector<size_t>& out = (*new_index);
+      out.push_back(mid + a);
+    } else {
+      mid += a * value[ndim - 1 - index];
+      RollDfs(new_axes, value, new_index, index + 1, ndim, mid);
+      mid -= a * value[ndim - 1 - index];
+    }
+  }
+}
+
+template<typename xpu>
+void NumpyRollCompute(const nnvm::NodeAttrs& attrs,
+                      const OpContext& ctx,
+                      const std::vector<TBlob>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<TBlob>& outputs) {
+  using namespace mxnet_op;
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  if (inputs[0].Size() == 0U) return;
+  const NumpyRollParam& param = nnvm::get<NumpyRollParam>(attrs.parsed);
+  const index_t ndim(inputs[0].shape_.ndim());
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  std::vector<int> shifts(ndim, 0);
+  index_t input_size = inputs[0].Size();
+  if (!param.axis.has_value()) {
+    int shift = param.shift.value()[0];
+    shift = shift % input_size;
+    if (shift < 0) {
+      shift += inputs[0].shape_.Size();
+    }
+    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+        Kernel<RollAxisNone_forward<req_type>, xpu>::Launch(
+            s, outputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
+            inputs[0].Size(), shift);
+      });
+    });
+  } else {
+    mxnet::TShape axes(param.axis.value());
+    for (int i = 0; i < axes.ndim(); ++i) {
+      if (axes[i] < 0) {
+        axes[i] += ndim;
+      }
+    }
+    for (int i = 0; i < axes.ndim(); ++i) {
+      CHECK_LT(axes[i], ndim)
+        << "axis " << axes[i]
+        << " Exceeds input dimensions " << inputs[0].shape_;
+      CHECK_GE(axes[0], 0)
+        << "Reduction axis " << param.axis.value()
+        << " Exceeds input dimensions " << inputs[0].shape_;
+    }
+    if (param.shift.value().ndim() == 1) {
+      for (int i = 0; i < axes.ndim(); ++i) {
+        shifts[axes[i]] = param.shift.value()[0];
+      }
+    } else {
+      if (param.shift.value().ndim() != axes.ndim()) {
+        LOG(FATAL) << "shift and `axis` must be a tuple of the same size,";
+      }
+      for (int i = 0; i < axes.ndim(); ++i) {
+        shifts[axes[i]] = param.shift.value()[i];
+      }
+    }
+    // keep shift in a legal range
+    for (int i = 0; i < ndim; ++i) {
+      int trans_shift = shifts[i] % inputs[0].shape_[i];
+      if (trans_shift < 0) {
+        trans_shift = shifts[i] + inputs[0].shape_[i];
+      }
+      shifts[i] = trans_shift;
+    }
+    // the result of new axis after shift.
+    std::vector<std::vector<size_t>> new_axes;
+    std::vector<size_t> new_index;
+    std::vector<size_t> temp;
+    std::vector<size_t> value(ndim, 0);
+    int mid_val = 1;
+    for (int i = 0; i < ndim; ++i) {
+      if (shifts[i] != 0) {
+        for (int j = 0; j < inputs[0].shape_[i]; ++j) {
+          int new_axis = (j + inputs[0].shape_[i] - shifts[i]) % inputs[0].shape_[i];
+          temp.push_back(new_axis);
+        }
+      } else {
+        for (int j = 0; j < inputs[0].shape_[i]; ++j) {
+          temp.push_back(j);
+        }
+      }
+      new_axes.push_back(temp);
+      temp.clear();
+      value[i] = mid_val;
+      mid_val *= inputs[0].shape_[ndim - 1 - i];
+    }
+    RollDfs(new_axes, value, &new_index, 0, ndim, 0);
+    size_t workspace_size = new_index.size() * sizeof(size_t);
+    Tensor<xpu, 1, char> workspace =
+        ctx.requested[0].get_space_typed<xpu, 1, char>(Shape1(workspace_size), s);
+    Tensor<cpu, 1, size_t> index_cpu_tensor(new_index.data(), Shape1(new_index.size()));
+    Tensor<xpu, 1, size_t> index_xpu_tensor(
+        reinterpret_cast<size_t*>(workspace.dptr_), Shape1(new_index.size()));
+    mshadow::Copy(index_xpu_tensor, index_cpu_tensor, s);
+    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+        Kernel<RollAxis_forward<req_type>, xpu>::Launch(
+            s, outputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
+            index_xpu_tensor.dptr_);
+      });
+    });
+  }
+}
+
 }  // namespace op
 }  // namespace mxnet
 

diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
@@ -31,6 +31,7 @@ namespace mxnet {
 namespace op {
 
 DMLC_REGISTER_PARAMETER(NumpyTransposeParam);
+DMLC_REGISTER_PARAMETER(NumpyRollParam);
 
 bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs,
                          mxnet::ShapeVector *in_attrs,
@@ -489,5 +490,80 @@ NNVM_REGISTER_OP(_backward_np_vstack)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", NumpyVstackBackward<cpu>);
 
+inline bool NumpyRollShape(const nnvm::NodeAttrs& attrs,
+                           mxnet::ShapeVector *in_attrs,
+                           mxnet::ShapeVector *out_attrs) {
+  using namespace mshadow;
+  const NumpyRollParam& param = nnvm::get<NumpyRollParam>(attrs.parsed);
+
+  if (!param.shift.has_value()) {
+    LOG(FATAL) << "roll missing 1 required positional argument: 'shift'.";
+  }
+  if (param.shift.value().ndim() > 1 &&
+      param.axis.has_value() &&
+      param.axis.value().ndim() != param.shift.value().ndim()) {
+    LOG(FATAL) << "shift and `axis` must be a tuple of the same size.";
+  }
+  if (!param.axis.has_value() && param.shift.has_value() && param.shift.value().ndim() > 1) {
+    LOG(FATAL) << "shift must be an int.";
+  }
+  if (param.axis.has_value()) {
+    mxnet::TShape axes(param.axis.value());
+    const index_t ndim = (*in_attrs)[0].ndim();
+    for (index_t i = 0; i < axes.ndim(); i++) {
+      if (axes[i] < 0) {
+        axes[i] += ndim;
+      }
+    }
+    std::sort(axes.begin(), axes.end());
+    for (index_t i = 1; i < axes.ndim(); i++) {
+      CHECK_LT(axes[i - 1], axes[i])
+        << "axes have duplicates " << axes;
+    }
+    CHECK_LT(axes[axes.ndim() - 1], ndim)
+      << "axis " << axes[axes.ndim() - 1]
+      << " Exceeds input dimensions " << (*in_attrs)[0];
+    CHECK_GE(axes[0], 0)
+      << "Reduction axis " << param.axis.value()
+      << " Exceeds input dimensions " << (*in_attrs)[0];
+  }
+  return ElemwiseShape<1, 1>(attrs, in_attrs, out_attrs);
+}
+
+NNVM_REGISTER_OP(_np_roll)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NumpyRollParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+     return std::vector<std::string>{"data"};
+})
+.set_attr<mxnet::FInferShape>("FInferShape", NumpyRollShape)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<mxnet::FCompute>("FCompute<cpu>", NumpyRollCompute<cpu>)
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+     const NumpyRollParam& param = nnvm::get<NumpyRollParam>(n->attrs.parsed);
+     if (!param.shift.has_value()) {
+       LOG(FATAL) << "roll missing 1 required positional argument: 'shift'.";
+     }
+     mxnet::TShape shifts(param.shift.value());
+     for (int i = 0; i < shifts.ndim(); ++i) {
+       shifts[i] = -shifts[i];
+     }
+     std::ostringstream os1;
+     os1 << dmlc::optional<mxnet::TShape>(shifts);
+     std::ostringstream os2;
+     os2 << param.axis;
+     return MakeNonlossGradNode("_np_roll", n, ograds, {},
+                                {{"shift", os1.str()}, {"axis", os2.str()}});
+})
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& n) {
+     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+})
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(NumpyRollParam::__FIELDS__());
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu
@@ -53,5 +53,8 @@ NNVM_REGISTER_OP(_npi_vstack)
 NNVM_REGISTER_OP(_backward_np_vstack)
 .set_attr<FCompute>("FCompute<gpu>", NumpyVstackBackward<gpu>);
 
+NNVM_REGISTER_OP(_np_roll)
+.set_attr<FCompute>("FCompute<gpu>", NumpyRollCompute<gpu>);
+
 }  // namespace op
 }  // namespace mxnet