PaddlePaddle · luotao1 · Jul 2, 2025 · Apr 10, 2025 · Apr 15, 2025 · Apr 15, 2025
@@ -0,0 +1,240 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/infermeta/spmd_rules/index_put.h"
+
+#include "glog/logging.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
+#include "paddle/phi/core/distributed/auto_parallel/inferspmd_utils.h"
+#include "paddle/phi/core/distributed/auto_parallel/utils.h"
+#include "paddle/phi/infermeta/spmd_rules/spmd_rule_macro_define.h"
+#include "paddle/phi/infermeta/spmd_rules/utils.h"
+
+namespace phi::distributed {
+SpmdInfo IndexPutInferSpmd(const DistMetaTensor& x,
+                           const std::vector<DistMetaTensor>& indices,
+                           const DistMetaTensor& value,
+                           const bool accumulate) {
+  // Step0: verify input args based on group_norm logic
+  auto x_shape = common::vectorize(x.dims());
+  int indices_size = indices.size();
+  auto indices_shape = common::vectorize(indices[0].dims());
+  auto value_shape = common::vectorize(value.dims());
+  int x_ndim = static_cast<int>(x_shape.size());
+  int indices_ndim = static_cast<int>(indices_shape.size());
+  int value_ndim = static_cast<int>(value_shape.size());
+
+  TensorDistAttr x_dist_attr_src = x.dist_attr();
+  std::vector<TensorDistAttr> indices_dist_attrs_src;
+  std::transform(indices.begin(),
+                 indices.end(),
+                 std::back_inserter(indices_dist_attrs_src),
+                 [](auto& meta) { return meta.dist_attr(); });
+  TensorDistAttr value_dist_attr_src = value.dist_attr();
+
+  std::vector<int64_t> x_dims_mapping = x_dist_attr_src.dims_mapping();
+
+  PADDLE_ENFORCE_GE(x_ndim,
+                    indices_size,
+                    common::errors::InvalidArgument(
+                        "The ndim of x in index_put should be "
+                        "greater than or equal to the size of indices, "
+                        "but got x_ndim:[%d],indices_size:[%d].",
+                        x_ndim,
+                        indices_size));
+
+  PADDLE_ENFORCE_EQ(value_ndim,
+                    1,
+                    common::errors::InvalidArgument(
+                        "The ndim of value in index_put should be equal to 1, "
+                        "but got value_ndim:[%d].",
+                        value_ndim));
+  PADDLE_ENFORCE_EQ(
+      indices_ndim,
+      1,
+      common::errors::InvalidArgument(
+          "The ndim of indices in index_put should be equal to 1, "
+          "but got indices_ndim:[%d].",
+          indices_ndim));
+  for (int i = 0; i < indices_size; i++) {
+    PADDLE_ENFORCE_EQ(
+        indices[i].dims().size(),
+        1,
+        common::errors::InvalidArgument(
+            "The ndim of indices[%d] in index_put should be equal to 1, "
+            "but got indices[%d] ndim:[%d].",
+            i,
+            i,
+            indices[i].dims().size()));
+  }
+  // Step1: set dims_mapping for input
+  if (x_ndim > indices_size) {
+    for (int i = 0; i < indices_size; i++) {
+      x_dims_mapping[i] = -1;
+    }
+  } else {  // indices_size = x_ndim
+    for (int i = 0; i < x_ndim; i++) {
+      x_dims_mapping[i] = -1;
+    }
+  }
+  // Step2: set dims_mapping for output
+  TensorDistAttr out_dist_attr = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  out_dist_attr.set_dims_mapping(x_dims_mapping);
+  // Step3: update input dims mapping
+  TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_dist_attr_dst.set_dims_mapping(x_dims_mapping);
+  TensorDistAttr value_dist_attr_dst =
+      CopyTensorDistAttrForOutput(value.dist_attr());
+  value_dist_attr_dst.set_dims_mapping(std::vector<int64_t>{-1});
+  std::vector<TensorDistAttr> indices_dist_attrs_dst = indices_dist_attrs_src;
+  for (auto& input_attr : indices_dist_attrs_dst) {
+    input_attr.set_dims_mapping(std::vector<int64_t>{-1});
+  }
+  // Step4: Log SpmdInfo
+  LOG_SPMD_INPUT(x);
+  // LOG_SPMD_INPUT(indices);
+  VLOG(4) << "name: indices";
+  VLOG(4) << "ndim: " << std::to_string(indices_ndim) << " "
+          << "indices_size: " << std::to_string(indices_size) << " "
+          << "indices_dist_attr_src: [" << indices_dist_attrs_src[0].to_string()
+          << "] "
+          << "indices_dist_attr_dst: [" << indices_dist_attrs_dst[0].to_string()
+          << "]";
+
+  LOG_SPMD_INPUT(value);
+  LOG_SPMD_OUTPUT(out_dist_attr);
+
+  return {{x_dist_attr_dst, indices_dist_attrs_dst, value_dist_attr_dst},
+          {out_dist_attr}};
+}
+
+SpmdInfo IndexPutGradInferSpmd(const DistMetaTensor& x,
+                               const std::vector<DistMetaTensor>& indices,
+                               const DistMetaTensor& value,
+                               const DistMetaTensor& out_grad,
+                               const bool accumulate) {
+  // Step0: verify input args based on group_norm logic
+  auto x_shape = common::vectorize(x.dims());
+  int indices_size = indices.size();
+  auto indices_shape = common::vectorize(indices[0].dims());
+  auto value_shape = common::vectorize(value.dims());
+  auto out_grad_shape = common::vectorize(out_grad.dims());
+  int x_ndim = static_cast<int>(x_shape.size());
+  int indices_ndim = static_cast<int>(indices_shape.size());
+  int value_ndim = static_cast<int>(value_shape.size());
+  int out_grad_ndim = static_cast<int>(out_grad_shape.size());
+  TensorDistAttr x_dist_attr_src = x.dist_attr();
+  std::vector<TensorDistAttr> indices_dist_attrs_src;
+  std::transform(indices.begin(),
+                 indices.end(),
+                 std::back_inserter(indices_dist_attrs_src),
+                 [](auto& meta) { return meta.dist_attr(); });
+  TensorDistAttr value_dist_attr_src = value.dist_attr();
+  TensorDistAttr out_grad_dist_attr_src = out_grad.dist_attr();
+  std::vector<int64_t> x_dims_mapping = x_dist_attr_src.dims_mapping();
+  PADDLE_ENFORCE_EQ(
+      out_grad_ndim,
+      x_ndim,
+      common::errors::InvalidArgument(
+          "The ndim of out_grad in index_put_grad should be equal to the "
+          "ndim of x, but got out_grad_ndim:[%d],x_ndim:[%d].",
+          out_grad_ndim,
+          x_ndim));
+  PADDLE_ENFORCE_GE(x_ndim,
+                    indices_size,
+                    common::errors::InvalidArgument(
+                        "The ndim of x in index_put should be "
+                        "greater than or equal to the size of indices, "
+                        "but got x_ndim:[%d],indices_size:[%d].",
+                        x_ndim,
+                        indices_size));
+
+  PADDLE_ENFORCE_EQ(value_ndim,
+                    1,
+                    common::errors::InvalidArgument(
+                        "The ndim of value in index_put should be equal to 1, "
+                        "but got value_ndim:[%d].",
+                        value_ndim));
+  PADDLE_ENFORCE_EQ(
+      indices_ndim,
+      1,
+      common::errors::InvalidArgument(
+          "The ndim of indices in index_put should be equal to 1, "
+          "but got indices_ndim:[%d].",
+          indices_ndim));
+  for (int i = 0; i < indices_size; i++) {
+    PADDLE_ENFORCE_EQ(
+        indices[i].dims().size(),
+        1,
+        common::errors::InvalidArgument(
+            "The ndim of indices[%d] in index_put should be equal to 1, "
+            "but got indices[%d] ndim:[%d].",
+            i,
+            i,
+            indices[i].dims().size()));
+  }
+  // Step1: set x_dims_mapping
+  if (x_ndim > indices_size) {
+    for (int i = 0; i < indices_size; i++) {
+      x_dims_mapping[i] = -1;
+    }
+  } else {  // indices_size = x_ndim
+    for (int i = 0; i < x_ndim; i++) {
+      x_dims_mapping[i] = -1;
+    }
+  }
+  // Step2: set dims_mapping for output
+  TensorDistAttr x_grad_dist_attr =
+      CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_grad_dist_attr.set_dims_mapping(x_dims_mapping);
+  TensorDistAttr value_grad_dist_attr =
+      CopyTensorDistAttrForOutput(value_dist_attr_src);
+  value_grad_dist_attr.set_dims_mapping(std::vector<int64_t>{-1});
+  // Step3: update input dims mapping
+  TensorDistAttr x_dist_attr_dst = CopyTensorDistAttrForOutput(x_dist_attr_src);
+  x_dist_attr_dst.set_dims_mapping(x_dims_mapping);
+  TensorDistAttr out_grad_dist_attr_dst =
+      CopyTensorDistAttrForOutput(x_dist_attr_src);
+  out_grad_dist_attr_dst.set_dims_mapping(x_dims_mapping);
+  TensorDistAttr value_dist_attr_dst =
+      CopyTensorDistAttrForOutput(value.dist_attr());
+  value_dist_attr_dst.set_dims_mapping(std::vector<int64_t>{-1});
+  std::vector<TensorDistAttr> indices_dist_attrs_dst = indices_dist_attrs_src;
+  for (auto& input_attr : indices_dist_attrs_dst) {
+    input_attr.set_dims_mapping(std::vector<int64_t>{-1});
+  }
+  // Step4: Log SpmdInfo
+  LOG_SPMD_INPUT(x);
+  // LOG_SPMD_INPUT(indices);
+  VLOG(4) << "name: indices";
+  VLOG(4) << "ndim: " << std::to_string(indices_ndim) << " "
+          << "indices_size: " << std::to_string(indices_size) << " "
+          << "indices_dist_attr_src: [" << indices_dist_attrs_src[0].to_string()
+          << "] "
+          << "indices_dist_attr_dst: [" << indices_dist_attrs_dst[0].to_string()
+          << "]";
+
+  LOG_SPMD_INPUT(value);
+  LOG_SPMD_INPUT(out_grad);
+  LOG_SPMD_OUTPUT(x_grad_dist_attr);
+  LOG_SPMD_OUTPUT(value_grad_dist_attr);
+
+  return {{x_dist_attr_dst,
+           indices_dist_attrs_dst,
+           value_dist_attr_dst,
+           out_grad_dist_attr_dst},
+          {x_grad_dist_attr, value_grad_dist_attr}};
+}
+
+}  // namespace phi::distributed
@@ -0,0 +1,32 @@
+/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/phi/core/distributed/auto_parallel/dist_meta_tensor.h"
+#include "paddle/phi/core/distributed/type_defs.h"
+
+namespace phi {
+namespace distributed {
+SpmdInfo IndexPutInferSpmd(const DistMetaTensor& x,
+                           const std::vector<DistMetaTensor>& indices,
+                           const DistMetaTensor& value,
+                           const bool accumulate = false);
+SpmdInfo IndexPutGradInferSpmd(const DistMetaTensor& x,
+                               const std::vector<DistMetaTensor>& indices,
+                               const DistMetaTensor& value,
+                               const DistMetaTensor& out_grad,
+                               const bool accumulate = false);
+}  // namespace distributed
+}  // namespace phi
@@ -544,7 +544,10 @@ PD_REGISTER_SPMD_RULE(
 PD_REGISTER_SPMD_RULE(fused_rms_norm,
                       PD_INFER_SPMD(phi::distributed::RmsNormInferSpmd),
                       PD_INFER_SPMD(phi::distributed::RmsNormInferSpmdReverse));
-
+// index_put
+PD_REGISTER_SPMD_RULE(index_put,
+                      PD_INFER_SPMD(phi::distributed::IndexPutInferSpmd),
+                      PD_INFER_SPMD(phi::distributed::IndexPutGradInferSpmd));
 PD_REGISTER_SPMD_RULE(
     flash_attention,
     PD_INFER_SPMD(phi::distributed::FlashAttInferSpmdStatic),

@@ -49,6 +49,7 @@ limitations under the License. */
 #include "paddle/phi/infermeta/spmd_rules/gather_nd.h"
 #include "paddle/phi/infermeta/spmd_rules/gelu.h"
 #include "paddle/phi/infermeta/spmd_rules/group_norm.h"
+#include "paddle/phi/infermeta/spmd_rules/index_put.h"
 #include "paddle/phi/infermeta/spmd_rules/index_select.h"
 #include "paddle/phi/infermeta/spmd_rules/instance_norm.h"
 #include "paddle/phi/infermeta/spmd_rules/label_smooth.h"

diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml
@@ -1724,6 +1724,7 @@
   output : Tensor(x_grad), Tensor(value_grad)
   infer_meta :
     func : IndexPutGradInferMeta
+    spmd_rule : IndexPutGradInferSpmd
   kernel :
     func : index_put_grad
     data_type : out_grad

diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml
@@ -2825,6 +2825,7 @@
   output : Tensor(out)
   infer_meta :
     func : IndexPutInferMeta
+    spmd_rule : IndexPutInferSpmd
   kernel :
     func : index_put
     data_type : x

diff --git a/test/cpp/auto_parallel/spmd_rule_test.cc b/test/cpp/auto_parallel/spmd_rule_test.cc
@@ -230,6 +230,84 @@ TEST(MatmulSPMDRule, Ctor) {
   check_partial_dims(inferred_dist_attrs.second[0], {0});
   VLOG(4) << "test11 done." << std::endl << std::endl << std::endl;
 }
+
+TEST(IndexPut, Ctor) {
+  // build input data class
+  std::vector<int64_t> x_shape = {64, 64, 64};
+  std::vector<int64_t> indice_shape = {32};
+  std::vector<int64_t> value_shape = {32};
+  std::vector<int64_t> mesh_shape = {2, 3};
+  std::vector<int64_t> process_ids = {0, 1, 2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"x", "y"};
+  ProcessMesh process_mesh(mesh_shape, process_ids, dim_names);
+
+  TensorDistAttr x_dist_attr = TensorDistAttr();
+  x_dist_attr.set_process_mesh(process_mesh);
+  x_dist_attr.set_dims_mapping(std::vector<int64_t>({-1, 0, 1}));
+  x_dist_attr.set_dynamic_dims(std::vector<bool>({false, false, false}));
+
+  TensorDistAttr value_dist_attr = TensorDistAttr();
+  value_dist_attr.set_process_mesh(process_mesh);
+  value_dist_attr.set_dims_mapping(std::vector<int64_t>({-1}));
+  value_dist_attr.set_dynamic_dims(std::vector<bool>({false}));
+  TensorDistAttr indice_dist_attr = TensorDistAttr();
+  indice_dist_attr.set_process_mesh(process_mesh);
+  indice_dist_attr.set_dims_mapping(std::vector<int64_t>({-1}));
+  indice_dist_attr.set_dynamic_dims(std::vector<bool>({false}));
+
+  // Test forward.
+  // [-1,0, 1], [[-1],[-1]], [-1] --> [-1,-1, 1]
+
+  phi::distributed::DistMetaTensor x(common::make_ddim(x_shape), x_dist_attr);
+  phi::distributed::DistMetaTensor value(common::make_ddim(value_shape),
+                                         value_dist_attr);
+  std::vector<phi::distributed::DistMetaTensor> indices;
+  for (int i = 0; i < 2; ++i) {
+    phi::distributed::DistMetaTensor indice(common::make_ddim(indice_shape),
+                                            indice_dist_attr);
+    indices.push_back(indice);
+  }
+  phi::distributed::SpmdInfo forward_info =
+      phi::distributed::IndexPutInferSpmd(x, indices, value);
+  size_t input_size = 3;
+  size_t output_size = 1;
+  EXPECT_EQ(forward_info.first.size(), input_size);
+  EXPECT_EQ(forward_info.second.size(), output_size);
+  check_dim_mapping(forward_info.first[0], {-1, -1, 1});
+  std::vector<TensorDistAttr> indices_dist_attr =
+      paddle::get<1>(forward_info.first[1]);
+  for (auto& attr : indices_dist_attr) {
+    check_dim_mapping(attr, {-1});
+  }
+
+  check_dim_mapping(forward_info.first[2], {-1});
+  check_dim_mapping(forward_info.second[0], {-1, -1, 1});
+  VLOG(4) << "test forward done.";
+
+  // Test backward.
+  // [-1,0, 1], [[-1],[-1]], [-1],[-1,0, 1] --> [-1,-1, 1], [-1]
+
+  phi::distributed::DistMetaTensor out_grad(common::make_ddim(x_shape),
+                                            x_dist_attr);
+
+  phi::distributed::SpmdInfo backward_info =
+      phi::distributed::IndexPutGradInferSpmd(x, indices, value, out_grad);
+  input_size = 4;
+  output_size = 2;
+  EXPECT_EQ(backward_info.first.size(), input_size);
+  EXPECT_EQ(backward_info.second.size(), output_size);
+  check_dim_mapping(backward_info.first[0], {-1, -1, 1});
+  indices_dist_attr = paddle::get<1>(backward_info.first[1]);
+  for (auto& attr : indices_dist_attr) {
+    check_dim_mapping(attr, {-1});
+  }
+
+  check_dim_mapping(backward_info.first[2], {-1});
+  check_dim_mapping(backward_info.first[3], {-1, -1, 1});
+  check_dim_mapping(backward_info.second[0], {-1, -1, 1});
+  check_dim_mapping(backward_info.second[1], {-1});
+  VLOG(4) << "test backward done.";
+}
 TEST(InstanceNorm, Ctor) {
   // build input data class
   std::vector<int64_t> x_shape = {64, 64, 64, 64};  // N,C,H,W