apache · haojin2 · Dec 10, 2019 · Dec 6, 2019 · Dec 6, 2019
@@ -1124,6 +1124,58 @@ def _np_diag(array, k=0):
     pass
 
 
+def _np_diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    If a is 2-D, returns the diagonal of a with the given offset, i.e., the collection of elements of
+    the form a[i, i+offset]. If a has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-array whose diagonal is returned. The shape of the
+    resulting array can be determined by removing axis1 and axis2 and appending an index to the
+    right equal to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : Symbol
+        Input data from which diagonal are taken.
+    offset: int, Optional
+        Offset of the diagonal from the main diagonal
+    axis1: int, Optional
+        Axis to be used as the first axis of the 2-D sub-arrays
+    axis2: int, Optional
+        Axis to be used as the second axis of the 2-D sub-arrays
+
+    Returns
+    -------
+    out : Symbol
+        Output result
+
+    Raises
+    -------
+    ValueError:  If the dimension of a is less than 2.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape(2,2)
+    >>> a
+    array([[0, 1],
+        [2, 3]])
+    >>> np.diagonal(a)
+    array([0, 3])
+    >>> np.diagonal(a, 1)
+    array([1])
+
+    >>> a = np.arange(8).reshape(2,2,2)
+    >>>a
+    array([[[0, 1],
+            [2, 3]],
+            [[4, 5],
+            [6, 7]]])
+    >>> np.diagonal(a, 0, 0, 1)
+    array([[0, 6],
+            [1, 7]])
+    """
+    pass
+
+
 def _np_diagflat(array, k=0):
     """
     Create a two-dimensional array with the flattened input as a diagonal.
@@ -1157,4 +1209,4 @@ def _np_diagflat(array, k=0):
            [0, 0, 2],
            [0, 0, 0]])
     """
-    pass
+    pass
@@ -94,6 +94,7 @@ def _run_with_array_ufunc_proto(*args, **kwargs):
     'copy',
     'cumsum',
     'diag',
+    'diagonal',
     'diagflat',
     'dot',
     'expand_dims',

diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h
@@ -1150,6 +1150,224 @@ void NumpyDiagOpBackward(const nnvm::NodeAttrs &attrs,
                              in_data.Size(), param.k, s, req[0]);
 }
 
+struct NumpyDiagonalParam : public dmlc::Parameter<NumpyDiagonalParam> {
+  int offset;
+  int32_t axis1;
+  int32_t axis2;
+  DMLC_DECLARE_PARAMETER(NumpyDiagonalParam) {
+    DMLC_DECLARE_FIELD(offset)
+      .set_default(0)
+      .describe("Diagonal in question. The default is 0. "
+                "Use k>0 for diagonals above the main diagonal, "
+                "and k<0 for diagonals below the main diagonal. "
+                "If input has shape (S0 S1) k must be between -S0 and S1");
+    DMLC_DECLARE_FIELD(axis1)
+      .set_default(0)
+      .describe("The first axis of the sub-arrays of interest. "
+                "Ignored when the input is a 1-D array.");
+    DMLC_DECLARE_FIELD(axis2)
+      .set_default(1)
+      .describe("The second axis of the sub-arrays of interest. "
+                "Ignored when the input is a 1-D array.");
+  }
+};
+
+inline mxnet::TShape NumpyDiagonalShapeImpl(const mxnet::TShape& ishape, const int k,
+                                            const int32_t axis1, const int32_t axis2) {
+  int32_t x1 = CheckAxis(axis1, ishape.ndim());
+  int32_t x2 = CheckAxis(axis2, ishape.ndim());
+
+  CHECK_NE(x1, x2) << "axis1 and axis2 cannot refer to the same axis " << x1;
+
+  auto h = ishape[x1];
+  auto w = ishape[x2];
+  if (k > 0) {
+    w -= k;
+  } else if (k < 0) {
+    h += k;
+  }
+  auto s = std::min(h, w);
+  if (s < 0) s = 0;
+  if (x1 > x2) std::swap(x1, x2);
+
+  int32_t n_dim = ishape.ndim() - 1;
+  mxnet::TShape oshape(n_dim, -1);
+
+  // remove axis1 and axis2 and append the new axis to the end
+  uint32_t idx = 0;
+  for (int i = 0; i <= n_dim; ++i) {
+    if (i != x1 && i != x2) {
+      oshape[idx++] = ishape[i];
+    }
+  }
+  oshape[n_dim - 1] = s;
+  return oshape;
+}
+
+inline bool NumpyDiagonalOpShape(const nnvm::NodeAttrs& attrs,
+                                 mxnet::ShapeVector* in_attrs,
+                                 mxnet::ShapeVector* out_attrs) {
+    CHECK_EQ(in_attrs->size(), 1U);
+    CHECK_EQ(out_attrs->size(), 1U);
+
+    const mxnet::TShape& ishape = (*in_attrs)[0];
+    CHECK_GE(ishape.ndim(), 2) << "Input array should be at least 2d";
+    if (!mxnet::ndim_is_known(ishape)) {
+      return false;
+    }
+
+    const NumpyDiagonalParam& param = nnvm::get<NumpyDiagonalParam>(attrs.parsed);
+    mxnet::TShape oshape = NumpyDiagonalShapeImpl(ishape, param.offset, param.axis1,
+                                         param.axis2);
+    if (shape_is_none(oshape)) {
+      LOG(FATAL) << "Diagonal does not exist.";
+    }
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
+    return shape_is_known(out_attrs->at(0));
+}
+
+inline bool NumpyDiagonalOpType(const nnvm::NodeAttrs& attrs,
+                                std::vector<int> *in_attrs,
+                                std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, (*in_attrs)[0]);
+  TYPE_ASSIGN_CHECK(*in_attrs, 0, (*out_attrs)[0]);
+  return (*out_attrs)[0] != -1;
+}
+
+template<int ndim, int req, bool back>
+struct diag_n {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* a,
+                                  mshadow::Shape<ndim> oshape,
+                                  mshadow::Shape<ndim> ishape,
+                                  index_t stride, index_t offset,
+                                  index_t base) {
+    using namespace mxnet_op;
+    index_t idx = i / base;
+    index_t j = ravel(unravel(idx, oshape), ishape) + offset + stride * (i - idx * base);
+    if (back) {
+      KERNEL_ASSIGN(out[j], req, a[i]);
+    } else {
+      KERNEL_ASSIGN(out[i], req, a[j]);
+    }
+  }
+};
+
+template<typename xpu, bool back>
+void NumpyDiagonalOpImpl(const TBlob& in_data,
+                         const TBlob& out_data,
+                         const mxnet::TShape& ishape,
+                         const mxnet::TShape& oshape,
+                         index_t dsize,
+                         const NumpyDiagonalParam& param,
+                         mxnet_op::Stream<xpu> *s,
+                         const std::vector<OpReqType>& req) {
+  using namespace mxnet_op;
+  using namespace mshadow;
+  uint32_t x1 = CheckAxis(param.axis1, ishape.ndim());
+  uint32_t x2 = CheckAxis(param.axis2, ishape.ndim());
+  uint32_t idim = ishape.ndim(), odim = oshape.ndim();
+  uint32_t minx = x1, maxx = x2;
+  if (minx > maxx) std::swap(minx, maxx);
+
+  index_t oleading = 1,
+          obody = 1,
+          otrailing = 1;
+  for (uint32_t i = 0; i < minx; ++i) {
+    oleading *= ishape[i];
+  }
+  for (uint32_t i = minx + 1; i < maxx; ++i) {
+    obody *= ishape[i];
+  }
+  for (uint32_t i = maxx + 1; i < idim; ++i) {
+    otrailing *= ishape[i];
+  }
+
+  index_t ileading = oleading,
+      ibody = obody * ishape[minx],
+      itrailing = otrailing * ishape[maxx];
+
+  index_t stride1 = itrailing * obody,
+      stride2 = otrailing;
+  // stride1 + stride2 is the stride for iterating over the diagonal
+
+  if (x1 == maxx) std::swap(stride1, stride2);
+  index_t offset;
+  int k = param.offset;
+  if (k > 0) {
+    offset = stride2 * k;
+  } else if (k < 0) {
+    offset = stride1 * -k;
+  } else {
+    offset = 0;
+  }  // the extra index offset introduced by k
+
+  MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, {
+      if (back && req[0] != kAddTo && req[0] != kNullOp) {
+        out_data.FlatTo1D<xpu, DType>(s) = 0;
+      }
+      if (ileading == 1) {
+        Kernel<diag_n<2, req_type, back>, xpu>::Launch(s, dsize, out_data.dptr<DType>(),
+          in_data.dptr<DType>(), Shape2(obody, otrailing), Shape2(ibody, itrailing),
+          stride1 + stride2, offset, oshape[odim - 1]);
+      } else {
+        Kernel<diag_n<3, req_type, back>, xpu>::Launch(s, dsize, out_data.dptr<DType>(),
+          in_data.dptr<DType>(), Shape3(oleading, obody, otrailing),
+          Shape3(ileading, ibody, itrailing), stride1 + stride2, offset, oshape[odim - 1]);
+      }
+    });
+  });
+}
+
+template<typename xpu>
+void NumpyDiagonalOpForward(const nnvm::NodeAttrs& attrs,
+                            const OpContext& ctx,
+                            const std::vector<TBlob>& inputs,
+                            const std::vector<OpReqType>& req,
+                            const std::vector<TBlob>& outputs) {
+  using namespace mxnet_op;
+  using namespace mshadow;
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  CHECK_EQ(req[0], kWriteTo);
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  const TBlob& in_data = inputs[0];
+  const TBlob& out_data = outputs[0];
+  const mxnet::TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
+  const NumpyDiagonalParam& param = nnvm::get<NumpyDiagonalParam>(attrs.parsed);
+
+  NumpyDiagonalOpImpl<xpu, false>(in_data, out_data, ishape, oshape,
+                                  out_data.Size(), param, s, req);
+}
+
+template<typename xpu>
+void NumpyDiagonalOpBackward(const nnvm::NodeAttrs& attrs,
+                             const OpContext& ctx,
+                             const std::vector<TBlob>& inputs,
+                             const std::vector<OpReqType>& req,
+                             const std::vector<TBlob>& outputs) {
+  using namespace mxnet_op;
+  using namespace mshadow;
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+
+  const TBlob& in_data = inputs[0];
+  const TBlob& out_data = outputs[0];
+  const mxnet::TShape& ishape = inputs[0].shape_;
+  const mxnet::TShape& oshape = outputs[0].shape_;
+  const NumpyDiagonalParam& param = nnvm::get<NumpyDiagonalParam>(attrs.parsed);
+
+  NumpyDiagonalOpImpl<xpu, true>(in_data, out_data, oshape, ishape,
+                                 in_data.Size(), param, s, req);
+}
+
 struct NumpyDiagflatParam : public dmlc::Parameter<NumpyDiagflatParam> {
   int k;
   DMLC_DECLARE_PARAMETER(NumpyDiagflatParam) {

diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
@@ -37,6 +37,7 @@ DMLC_REGISTER_PARAMETER(NumpyRot90Param);
 DMLC_REGISTER_PARAMETER(NumpyReshapeParam);
 DMLC_REGISTER_PARAMETER(NumpyXReshapeParam);
 DMLC_REGISTER_PARAMETER(NumpyDiagParam);
+DMLC_REGISTER_PARAMETER(NumpyDiagonalParam);
 DMLC_REGISTER_PARAMETER(NumpyDiagflatParam);
 
 
@@ -1326,6 +1327,28 @@ NNVM_REGISTER_OP(_backward_np_diag)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", NumpyDiagOpBackward<cpu>);
 
+NNVM_REGISTER_OP(_np_diagonal)
+.set_attr_parser(ParamParser<NumpyDiagonalParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data"};
+  })
+.set_attr<mxnet::FInferShape>("FInferShape", NumpyDiagonalOpShape)
+.set_attr<nnvm::FInferType>("FInferType", NumpyDiagonalOpType)
+.set_attr<FCompute>("FCompute<cpu>", NumpyDiagonalOpForward<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_np_diagonal"})
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(NumpyDiagonalParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_np_diagonal)
+.set_attr_parser(ParamParser<NumpyDiagonalParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", NumpyDiagonalOpBackward<cpu>);
+
 NNVM_REGISTER_OP(_np_diagflat)
 .set_attr_parser(ParamParser<NumpyDiagflatParam>)
 .set_num_inputs(1)

diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu
@@ -124,6 +124,12 @@ NNVM_REGISTER_OP(_np_diag)
 NNVM_REGISTER_OP(_backward_np_diag)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagOpBackward<gpu>);
 
+NNVM_REGISTER_OP(_np_diagonal)
+.set_attr<FCompute>("FCompute<gpu>", NumpyDiagonalOpForward<gpu>);
+
+NNVM_REGISTER_OP(_backward_np_diagonal)
+.set_attr<FCompute>("FCompute<gpu>", NumpyDiagonalOpBackward<gpu>);
+
 NNVM_REGISTER_OP(_np_diagflat)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagflatOpForward<gpu>);
 

diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py
@@ -95,6 +95,23 @@ def get_mat(n):
     OpArgMngr.add_workload('diag', vals_f, k=-2)
 
 
+def _add_workload_diagonal():
+    A = np.arange(12).reshape((3, 4))
+    B = np.arange(8).reshape((2,2,2))
+
+    OpArgMngr.add_workload('diagonal', A)
+    OpArgMngr.add_workload('diagonal', A, offset=0)
+    OpArgMngr.add_workload('diagonal', A, offset=-1)
+    OpArgMngr.add_workload('diagonal', A, offset=1)
+    OpArgMngr.add_workload('diagonal', B, offset=0)
+    OpArgMngr.add_workload('diagonal', B, offset=1)
+    OpArgMngr.add_workload('diagonal', B, offset=-1)
+    OpArgMngr.add_workload('diagonal', B, 0, 1, 2)
+    OpArgMngr.add_workload('diagonal', B, 0, 0, 1)
+    OpArgMngr.add_workload('diagonal', B, offset=1, axis1=0, axis2=2)
+    OpArgMngr.add_workload('diagonal', B, 0, 2, 1)
+
+
 def _add_workload_concatenate(array_pool):
     OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']])
     OpArgMngr.add_workload('concatenate', [array_pool['4x1'], array_pool['4x1']], axis=1)
@@ -1321,6 +1338,7 @@ def _prepare_workloads():
     _add_workload_ravel()
     _add_workload_unravel_index()
     _add_workload_diag()
+    _add_workload_diagonal()
     _add_workload_diagflat()
     _add_workload_dot()
     _add_workload_expand_dims()