PaddlePaddle · helinwang · Jan 26, 2018 · Jan 27, 2018 · wangkuiyi · Jan 28, 2018
diff --git a/doc/design/images/send_recv.graffle b/doc/design/images/send_recv.graffle
diff --git a/doc/design/images/send_recv.png b/doc/design/images/send_recv.png
diff --git a/doc/design/send_recv.md b/doc/design/send_recv.md
@@ -0,0 +1,68 @@
+# Send and Recv OP
+
+With PaddlePaddle Fluid, the node to node communication is done
+through the send and recv OP. The send and recv OP will be
+automatically added to
+the
+[`ProgramDesc`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md#blockdesc-and-programdesc) by
+the
+[distributed transpiler](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/dist_refactor/distributed_architecture.md#distributed-transpiler) when
+a local training `ProgramDesc` is transpiled to the distributed
+training `ProgramDesc`.
+
+
+## Send OP
+
+The send OP sends its input tensors to one or multiple recv OPs and
+then fetches its output tensors from the recv OPs.
+
+|Input|Description|
+|------|------|
+|X|Input tensor to be sent|
+
+|Output|Description|
+|------|------|
+|Out|Output tensor to be received from the recv OP|
+
+|Attribute|Description|
+|------|------|
+|endpoints|Endpoints to send the variables to|
+|epmap|Endpoints in the order of input variables, used for sharding|
+
+
+## Recv OP
+
+The recv OP receives its input tensors, runs the optimization block, and
+serves the tensors requested by the send OP.
+
+|Input|Description|
+|------|------|
+|RX|Input tensor to be received|
+
+|Attribute|Description|
+|------|------|
+|endpoint|IP address to listen on|
+|OptimizeBlock|The block to run after receiving the tensors from the send OP|
+|ParamList|gradient to parameter name mapping to find which parameters to optimize|
+|GradList|parameter name to gradient name mapping to find which gradient to use|
+|Fanin|Number of send OPs connected to this recv OP|
+
+
+## Example
+
+
+<img src="images/send_recv.png"/>
+
+The graph above shows one iteration of the optimization process:
+
+1. When the execution reaches the send OP, it will send `A grad` and
+`B grad` using gRPC to the recv OPs on Pserver 0 and Pserver 1
+respectively.
+
+1. After receiving the tensor, the recv OP will execute the
+optimization block. The optimization block on Pserver 0 will update
+`A` using `A grad`. Same for Pserver 1.
+
+1. The send OP will wait for the completion of the optimization
+   block. Upon completion, it will fetch the updated `A` and `B` from
+   the Pservers.
diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc
@@ -161,29 +161,29 @@ class RecvOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   RecvOpMaker(OpProto *proto, OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("RX", "(Tensor) Input tensor to be optimized").AsDuplicable();
+    AddInput("RX", "(Tensor) Input tensor to be received").AsDuplicable();
     AddComment(R"DOC(
 Recv operator
 
-This operator will recieve tensor from send_op
+This operator receives its input tensors, runs the optimize block, and serves the tensors requested by the send OP.
 )DOC");
     AddAttr<std::string>("endpoint",
                          "(string, default 127.0.0.1:6164)"
                          "IP address to listen on.")
         .SetDefault("127.0.0.1:6164")
         .AddCustomChecker([](const std::string &ip) { return !ip.empty(); });
     AddAttr<framework::BlockDesc *>(
-        kOptimizeBlock, "Serialized ProgramDesc string for recv to run.");
+        kOptimizeBlock, "The block to run after receiving the tensors from the send OP.");
     AddAttr<std::vector<std::string>>(
         "ParamList", "type list of string",
-        "grad->param name mapping to find which parameters to optimize.")
+        "gradient name to parameter name mapping to find which parameters to optimize.")
         .SetDefault({});
     AddAttr<std::vector<std::string>>(
         "GradList", "type list of string",
-        "grad->param name mapping to find which parameters to optimize.")
+        "parameter name to gradient name mapping to find which gradient to use.")
         .SetDefault({});
     AddAttr<int>("Fanin", "type int",
-                 "Number of trainers in the current cluster job")
+                 "Number of send OPs connected to this recv OP")
         .SetDefault(1);
   }
 };

diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc
@@ -63,21 +63,21 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker {
   SendOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "(Tensor) Input tensor to be sent").AsDuplicable();
-    AddOutput("Out", "(Tensor) Output tensor to be received from server")
+    AddOutput("Out", "(Tensor) Output tensor to be received from the recv OP")
         .AsDuplicable();
     AddComment(R"DOC(
 Send operator
 
-This operator will send tensor to recv_op at the parameter server.
+This operator sends its input tensors to one or multiple recv OPs and then fetches its output tensors from the recv OPs.
 )DOC");
     AddAttr<std::vector<std::string>>("endpoints",
                                       "(string vector, default 127.0.0.1:6164)"
-                                      "Server endpoints to send variables to.")
+                                      "Endpoints to send the variables to")
         .SetDefault({});
     AddAttr<std::vector<std::string>>("epmap",
                                       "(string vector, default 127.0.0.1:6164)"
-                                      "Server endpoints in the order of input "
-                                      "variables for mapping")
+                                      "Endpoints in the order of input "
+                                      "variables, used for sharding")
         .SetDefault({});
   }
 };