[Relax][Transform] Add mode choice, NaN mode, and warning for take()

vacu9708 · vacu9708 · commit 8d2797318698 · 2025-06-15T00:45:47.000+09:00
- Add a `mode` parameter to Relax’s `take()`
- Add `NaN` mode to `take()`
- Add unit tests covering all `take()` modes
- Add a warning log for `fast` mode
- Unify default modes in lower layers to `fast` for consistency with Relax
diff --git a/.lesshst b/.lesshst
@@ -0,0 +1 @@
+.less-history-file:
diff --git a/include/tvm/relax/attrs/index.h b/include/tvm/relax/attrs/index.h
@@ -32,9 +32,11 @@ namespace relax {
 /*! \brief Attributes used in take operator */
 struct TakeAttrs : public tvm::AttrsNode<TakeAttrs> {
   Optional<int64_t> axis;
+  String mode;
 
   TVM_DECLARE_ATTRS(TakeAttrs, "relax.attrs.TakeAttrs") {
     TVM_ATTR_FIELD(axis).describe("The axis over which to select values.");
+    TVM_ATTR_FIELD(mode).describe("The mode for handling out-of-bounds indices.");
   }
 };  // struct TakeAttrs
 
diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
@@ -1032,6 +1032,16 @@ inline Tensor take(const Tensor& a, const Tensor& indices, int batch_dims,
         out_shape,
         [&](const Array<Var>& out_index) { return a(UnravelIndex(indices(out_index), a_shape)); },
         name, tag);
+  } else if (mode == "nan") {
+    return compute(
+        out_shape,
+        [&](const Array<Var>& out_index) {
+          auto idx = tvm::if_then_else(
+              indices(out_index) < 0 || indices(out_index) >= a_size,
+              tvm::FloatImm(a->dtype, std::numeric_limits<float>::quiet_NaN()), indices(out_index));
+          return a(UnravelIndex(idx, a_shape));
+        },
+        name, tag);
   } else {  // mode == "wrap"
     return compute(
         out_shape,
@@ -1094,7 +1104,7 @@ inline Tensor sequence_mask(const Tensor& data, const Tensor& valid_length, doub
  * \return A Tensor whose op member is the take operation
  */
 inline Tensor take(const Tensor& a, Variant<Tensor, PrimExpr> indices, int batch_dims, int axis,
-                   std::string mode = "clip", std::string name = "T_take",
+                   std::string mode = "fast", std::string name = "T_take",
                    std::string tag = kInjective) {
   if (axis < 0) {
     axis += static_cast<int>(a->shape.size());
@@ -1206,6 +1216,8 @@ inline Tensor take(const Tensor& a, Variant<Tensor, PrimExpr> indices, int batch
           name, tag);
     }
   } else if (mode == "fast") {
+    LOG(WARNING) << "Fast mode segfaults when there are out-of-bounds indices. "
+                    "Make sure input indices are in bound";
     return compute(
         out_shape,
         [&](const Array<Var>& out_index) {
@@ -1224,6 +1236,29 @@ inline Tensor take(const Tensor& a, Variant<Tensor, PrimExpr> indices, int batch
           return a(real_indices);
         },
         name, tag);
+  } else if (mode == "nan") {
+    return compute(
+        out_shape,
+        [&](const Array<Var>& out_index) {
+          Array<PrimExpr> indices_position;
+          for (size_t j = axis; j < static_cast<size_t>(axis + indices_len); ++j) {
+            indices_position.push_back(out_index[j]);
+          }
+          Array<PrimExpr> real_indices;
+          for (size_t j = 0; j < static_cast<size_t>(axis); ++j) {
+            real_indices.push_back(out_index[j]);
+          }
+          PrimExpr idx = get_index(indices_position);
+          real_indices.push_back(idx);
+          for (size_t j = axis + indices_len; j < out_index.size(); ++j) {
+            real_indices.push_back(out_index[j]);
+          }
+          PrimExpr in_bounds = idx >= 0 && idx < axis_dim;
+          return tvm::if_then_else(
+              in_bounds, a(real_indices),
+              tvm::tir::make_const(a->dtype, std::numeric_limits<float>::quiet_NaN()));
+        },
+        name, tag);
   } else {  // mode == "wrap"
     return compute(
         out_shape,
diff --git a/python/tvm/relax/op/index.py b/python/tvm/relax/op/index.py
@@ -26,7 +26,7 @@
 PrimExprLike = Union[int, PrimExpr]
 
 
-def take(x: Expr, indices: Expr, axis: Optional[int] = None) -> Expr:
+def take(x: Expr, indices: Expr, axis: Optional[int] = None, mode: str = "fast") -> Expr:
     """Take elements from a tensor along an axis.
     Its semantic is mostly similar to `numpy.take`
     (https://numpy.org/doc/stable/reference/generated/numpy.take.html),
@@ -45,12 +45,20 @@ def take(x: Expr, indices: Expr, axis: Optional[int] = None) -> Expr:
         The axis over which to select values.
         If it is none, the input tensor is required to be one-dimensional.
 
+    mode : str
+        Specifies how out-of-bounds indices will behave.
+        - fast (default): extra indices lead to seg fault (user must make sure indices are in-bound)
+        - nan: produce NaNs for out-of-bounds indices
+        - wrap: wrap around the indices
+        - clip: clip to the range
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis.
     Returns
     -------
     ret : relax.Expr
         The taken result.
     """
-    return _ffi_api.take(x, indices, axis)  # type: ignore
+    return _ffi_api.take(x, indices, axis, mode)  # type: ignore
 
 
 @args_converter.auto
diff --git a/python/tvm/relax/transform/legalize_ops/index.py b/python/tvm/relax/transform/legalize_ops/index.py
@@ -26,11 +26,9 @@
 
 @register_legalize("relax.take")
 def _take(bb: BlockBuilder, call: Call) -> Expr:
-    # Currently Relax `take` operator doesn't provide the mode choices and
-    # requires input indices to be in range.
-    # We use fast mode, which leads to runtime error whenever some index is
-    # out of bound.
-    return bb.call_te(topi.take, call.args[0], call.args[1], call.attrs.axis, mode="fast")
+    # Currently "fast" is the default mode, which leads to segmentation faults
+    # when there are out-of-bounds indices.
+    return bb.call_te(topi.take, call.args[0], call.args[1], call.attrs.axis, mode=call.attrs.mode)
 
 
 @register_legalize("relax.strided_slice")
diff --git a/python/tvm/topi/transform.py b/python/tvm/topi/transform.py
@@ -18,9 +18,6 @@
 """Injective transformation operators"""
 from __future__ import absolute_import as _abs
 
-from math import pi
-import numpy as np
-
 import tvm
 from tvm import te, topi
 
@@ -99,8 +96,7 @@ def _compute(*idxs):
         axis_index = 0
         for i in range(0, len(idxs)):
             if i not in real_axis:
-                dim = tvm.tir.if_then_else(a.shape[len(indices)] != 1, idxs[i], 0)
-                indices.append(dim)
+                indices.append(idxs[i])
                 axis_index += 1
         return a(*indices)
 
@@ -446,7 +442,7 @@ def split(ary, indices_or_sections, axis=0):
     return cpp.split(ary, indices_or_sections, axis)
 
 
-def take(a, indices, axis=None, batch_dims=0, mode="clip"):
+def take(a, indices, axis=None, batch_dims=0, mode="fast"):
     """Take elements from an array along an axis.
 
     Parameters
@@ -465,10 +461,13 @@ def take(a, indices, axis=None, batch_dims=0, mode="clip"):
         The number of batch dimensions. By default is 0.
 
     mode : str, optional
-        Specifies how out-of-bound indices will behave.
-        clip - clip to the range (default)
-        wrap - wrap around the indices
-        fast - no clip or wrap around (user must make sure indices are in-bound)
+        Specifies how out-of-bounds indices will behave.
+        - fast (default): extra indices lead to seg fault (user must make sure indices are in-bound)
+        - nan: produce NaNs for out-of-bounds indices
+        - wrap: wrap around the indices
+        - clip: clip to the range
+        'clip' mode means that all indices that are too large are replaced
+        by the index that addresses the last element along that axis.
 
     Returns
     -------
@@ -1109,45 +1108,3 @@ def index_tensor(data, indices):
         z = topi.index_tensor(x, [row, col])             # shape (2, 3)
     """
     return topi.adv_index(data, indices)
-
-
-def hamming_window(window_size, periodic, alpha, beta, dtype):
-    """Hamming window function.
-
-    Parameters
-    ----------
-    window_size: tvm.Expr
-        The size of returned window.
-
-    periodic: tvm.Expr
-        If True, returns a window to be used as periodic function.
-        If False, return a symmetric window.
-
-    alpha: tvm.Expr
-        The co-efficient alpha.
-
-    beta: tvm.Expr
-        The co-efficient beta.
-
-    Returns
-    -------
-    ret : tvm.te.Tensor
-        The result tensor.
-    """
-    if window_size == 1:
-        return topi.const_vector(np.array([1], dtype=dtype))
-
-    periodic = topi.cast(periodic, "bool")
-
-    if periodic:
-        window_size += 1
-
-    index = topi.arange(0, window_size, dtype=dtype)
-    angular_freq = 2 * pi * index / (window_size - 1)
-    cos_values = topi.cos(angular_freq)
-    window = topi.cast(alpha - beta * cos_values, dtype=dtype)
-
-    if periodic:
-        return topi.strided_slice(window, [0], [window.shape[0] - 1])
-
-    return window
diff --git a/src/relax/op/tensor/index.cc b/src/relax/op/tensor/index.cc
@@ -39,9 +39,10 @@ namespace relax {
 /* relax.take */
 TVM_REGISTER_NODE_TYPE(TakeAttrs);
 
-Expr take(Expr x, Expr indices, Optional<int64_t> axis) {
+Expr take(Expr x, Expr indices, Optional<int64_t> axis, String mode) {
   ObjectPtr<TakeAttrs> attrs = make_object<TakeAttrs>();
   attrs->axis = std::move(axis);
+  attrs->mode = std::move(mode);
 
   static const Op& op = Op::Get("relax.take");
   return Call(op, {std::move(x), std::move(indices)}, Attrs(attrs), {});
diff --git a/src/relax/op/tensor/index.h b/src/relax/op/tensor/index.h
@@ -38,9 +38,10 @@ namespace relax {
  * It is required to be a one-dimensional tensor which has integer dtype.
  * \param axis The axis over which to select values.
  * If it is `std::nullopt`, the input tensor is required to be one-dimensional.
+ * \param mode The mode for handling out-of-bounds indices.
  * \return The taken result.
  */
-Expr take(Expr x, Expr indices, Optional<int64_t> axis);
+Expr take(Expr x, Expr indices, Optional<int64_t> axis, String mode);
 
 /*!
  * \brief Strided slice of a tensor.
diff --git a/src/relax/transform/reorder_take_after_matmul.cc b/src/relax/transform/reorder_take_after_matmul.cc
@@ -95,7 +95,7 @@ std::tuple<DFPattern, ffi::TypedFunction<Expr(Expr, Map<DFPattern, Expr>)>> Crea
       // out_table.shape = [*batch, table_size]
       auto out_table = matmul(lhs, weights, DataType::Void());
       // new_output.shape = [*batch, outfeatures]
-      auto new_output = take(out_table, indices, matmul_sinfo->ndim - 1);
+      auto new_output = take(out_table, indices, matmul_sinfo->ndim - 1, attrs->mode);
 
       return new_output;
     } else if (lhs_sinfo->ndim == 3 && weights_sinfo->ndim == 3 && indices_sinfo->ndim == 1 &&
@@ -132,7 +132,7 @@ std::tuple<DFPattern, ffi::TypedFunction<Expr(Expr, Map<DFPattern, Expr>)>> Crea
       // operations.
 
       // duplicated_output.shape = [batch1, batch2, batch1, outfeatures]
-      auto duplicated_output = take(indexed_output, indices, 2);
+      auto duplicated_output = take(indexed_output, indices, 2, attrs->mode);
       // new_output.shape = [batch1, batch2, outfeatures]
       auto new_output = einsum(Tuple({duplicated_output}), "ijik->ijk");
 
diff --git a/tests/python/relax/test_op_take.py b/tests/python/relax/test_op_take.py
@@ -154,5 +154,85 @@ def main(A: R.Tensor(["n", "n"], "float16")):
     tvm.testing.assert_allclose(tvm_output.numpy(), np_expected)
 
 
+@tvm.testing.parametrize_targets("llvm")
+def test_take_nan_mode_OOB_indices(target, dev, axis):
+    """Test R.take with mode="nan" and out-of-bounds indices.
+    This test checks that out-of-bounds indices produce NaN values in the output tensor.
+    """
+
+    @I.ir_module
+    class Module:
+        @R.function
+        def main(A: R.Tensor([3, 3], "float16")):
+            output = R.take(A, R.const([0, 1, 2, 3]), axis=axis, mode="nan")
+            return output
+
+    built = tvm.compile(Module, target=target)
+    vm = tvm.relax.VirtualMachine(built, dev)
+
+    np_input = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], dtype="float16")
+    tvm_input = tvm.nd.array(np_input, dev)
+    tvm_output = vm["main"](tvm_input)
+    if axis == 0:
+        np_expected = np.array(
+            [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [np.nan, np.nan, np.nan]],
+            dtype="float16",
+        )
+    elif axis == 1:
+        np_expected = np.array(
+            [[1.0, 2.0, 3.0, np.nan], [4.0, 5.0, 6.0, np.nan], [7.0, 8.0, 9.0, np.nan]],
+            dtype="float16",
+        )
+
+    tvm.testing.assert_allclose(tvm_output.numpy(), np_expected)
+
+
+@tvm.testing.parametrize_targets("llvm")
+def test_take_wrap_mode_OOB_indices(target, dev, axis):
+    """Test R.take with mode="wrap" and out-of-bounds indices.
+    This test checks that out-of-bounds indices wrap around to the valid range.
+    """
+
+    @I.ir_module
+    class Module:
+        @R.function
+        def main(A: R.Tensor([3, 3], "float16")):
+            output = R.take(A, R.const([0, 1, 2, 3]), axis=axis, mode="wrap")
+            return output
+
+    built = tvm.compile(Module, target=target)
+    vm = tvm.relax.VirtualMachine(built, dev)
+
+    np_input = np.random.random(size=[3, 3]).astype("float16")
+    tvm_input = tvm.nd.array(np_input, dev)
+    tvm_output = vm["main"](tvm_input)
+    np_expected = np.take(np_input, [0, 1, 2, 3], axis=axis, mode="wrap")
+
+    tvm.testing.assert_allclose(tvm_output.numpy(), np_expected)
+
+
+@tvm.testing.parametrize_targets("llvm")
+def test_take_clip_mode_OOB_indices(target, dev, axis):
+    """Test R.take with mode="clip" and out-of-bounds indices.
+    This test checks that out-of-bounds indices are clipped to the valid range.
+    """
+
+    @I.ir_module
+    class Module:
+        @R.function
+        def main(A: R.Tensor([3, 3], "float16")):
+            output = R.take(A, R.const([0, 1, 2, 3]), axis=axis, mode="clip")
+            return output
+
+    built = tvm.compile(Module, target=target)
+    vm = tvm.relax.VirtualMachine(built, dev)
+    np_input = np.random.random(size=[3, 3]).astype("float16")
+    tvm_input = tvm.nd.array(np_input, dev)
+    tvm_output = vm["main"](tvm_input)
+    np_expected = np.take(np_input, [0, 1, 2, 3], axis=axis, mode="clip")
+
+    tvm.testing.assert_allclose(tvm_output.numpy(), np_expected)
+
+
 if __name__ == "__main__":
     tvm.testing.main()