From 76bcdb280e254d682be6fc6f85588f1940bb1ade Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Mon, 31 Jul 2023 21:42:30 +0800
Subject: [PATCH 01/11] [Doctest]fix No.21, test=docs_preview

---
 python/paddle/autograd/py_layer.py | 381 +++++++++++++++--------------
 1 file changed, 192 insertions(+), 189 deletions(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index d2dd31f08dcac..4a3eba71b1d84 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -33,23 +33,23 @@ class PyLayerContext:
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.autograd import PyLayer
-
-            class cus_tanh(PyLayer):
-                @staticmethod
-                def forward(ctx, x):
-                    # ctx is a object of PyLayerContext.
-                    y = paddle.tanh(x)
-                    ctx.save_for_backward(y)
-                    return y
-
-                @staticmethod
-                def backward(ctx, dy):
-                    # ctx is a object of PyLayerContext.
-                    y, = ctx.saved_tensor()
-                    grad = dy * (1 - paddle.square(y))
-                    return grad
+            >>> import paddle
+            >>> from paddle.autograd import PyLayer
+
+            >>> class cus_tanh(PyLayer):
+            ...     @staticmethod
+            ...     def forward(ctx, x):
+            ...         # ctx is a object of PyLayerContext.
+            ...         y = paddle.tanh(x)
+            ...         ctx.save_for_backward(y)
+            ...         return y
+            ...
+            ...     @staticmethod
+            ...     def backward(ctx, dy):
+            ...         # ctx is a object of PyLayerContext.
+            ...         y, = ctx.saved_tensor()
+            ...         grad = dy * (1 - paddle.square(y))
+            ...         return grad
     """
 
     def save_for_backward(self, *tensors):
@@ -68,24 +68,24 @@ def save_for_backward(self, *tensors):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        # ctx is a context object that store some objects for backward.
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         # ctx is a context object that store some objects for backward.
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
 
         """
         self.container = tensors
@@ -101,24 +101,24 @@ def saved_tensor(self):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        # ctx is a context object that store some objects for backward.
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         # ctx is a context object that store some objects for backward.
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
         return self.container
 
@@ -135,30 +135,31 @@ def mark_not_inplace(self, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-
-                class Exp(paddle.autograd.PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        ctx.mark_not_inplace(x)
-                        return x
-
-                    @staticmethod
-                    def backward(ctx, grad_output):
-                        out = grad_output.exp()
-                        return out
-
-                x = paddle.randn((1, 1))
-                x.stop_gradient = False
-                attn_layers = []
-                for idx in range(0, 2):
-                    attn_layers.append(Exp())
-
-                for step in range(0, 2):
-                    a = x
-                    for j in range(0,2):
-                        a = attn_layers[j].apply(x)
-                    a.backward()
+                >>> import paddle
+
+                >>> class Exp(paddle.autograd.PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         ctx.mark_not_inplace(x)
+                ...         return x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad_output):
+                ...         out = grad_output.exp()
+                ...         return out
+
+                >>> paddle.seed(2023)
+                >>> x = paddle.randn((1, 1))
+                >>> x.stop_gradient = False
+                >>> attn_layers = []
+                >>> for idx in range(0, 2):
+                ...     attn_layers.append(Exp())
+
+                >>> for step in range(0, 2):
+                ...     a = x
+                ...     for j in range(0,2):
+                ...         a = attn_layers[j].apply(x)
+                ...     a.backward()
         """
         self.not_inplace_tensors = args
 
@@ -177,28 +178,28 @@ def mark_non_differentiable(self, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-                import numpy as np
-
-                class Tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        a = x + x
-                        b = x + x + x
-                        ctx.mark_non_differentiable(a)
-                        return a, b
-
-                    @staticmethod
-                    def backward(ctx, grad_a, grad_b):
-                        assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
-                        assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
-                        return grad_b
-
-                x = paddle.ones([1], dtype="float64")
-                x.stop_gradient = False
-                a, b = Tanh.apply(x)
-                b.sum().backward()
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+                >>> import numpy as np
+
+                >>> class Tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         a = x + x
+                ...         b = x + x + x
+                ...         ctx.mark_non_differentiable(a)
+                ...         return a, b
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad_a, grad_b):
+                ...         assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
+                ...         assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
+                ...         return grad_b
+
+                >>> x = paddle.ones([1], dtype="float64")
+                >>> x.stop_gradient = False
+                >>> a, b = Tanh.apply(x)
+                >>> b.sum().backward()
         """
         self.non_differentiable = args
 
@@ -216,38 +217,39 @@ def set_materialize_grads(self, value: bool):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-                import numpy as np
-
-                class Tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        return x+x+x, x+x
-
-                    @staticmethod
-                    def backward(ctx, grad, grad2):
-                        assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
-                        return grad
-
-                class Tanh2(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        ctx.set_materialize_grads(False)
-                        return x+x+x, x+x
-
-                    @staticmethod
-                    def backward(ctx, grad, grad2):
-                        assert grad2==None
-                        return grad
-
-                x = paddle.ones([1], dtype="float64")
-                x.stop_gradient = False
-                Tanh.apply(x)[0].backward()
-
-                x2 = paddle.ones([1], dtype="float64")
-                x2.stop_gradient = False
-                Tanh2.apply(x2)[0].backward()
+                # doctest: +SKIP('')
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+                >>> import numpy as np
+
+                >>> class Tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         return x+x+x, x+x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad, grad2):
+                ...         assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
+                ...         return grad
+
+                >>> class Tanh2(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         ctx.set_materialize_grads(False)
+                ...         return x+x+x, x+x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad, grad2):
+                ...         assert grad2==None
+                ...         return grad
+
+                >>> x = paddle.ones([1], dtype="float64")
+                >>> x.stop_gradient = False
+                >>> Tanh.apply(x)[0].backward()
+
+                >>> x2 = paddle.ones([1], dtype="float64")
+                >>> x2.stop_gradient = False
+                >>> Tanh2.apply(x2)[0].backward()
         """
         self.materialize_grads = value
 
@@ -290,30 +292,31 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.autograd import PyLayer
-
-            class cus_tanh(PyLayer):
-                @staticmethod
-                def forward(ctx, x):
-                    y = paddle.tanh(x)
-                    # Pass tensors to backward.
-                    ctx.save_for_backward(y)
-                    return y
-
-                @staticmethod
-                def backward(ctx, dy):
-                    # Get the tensors passed by forward.
-                    y, = ctx.saved_tensor()
-                    grad = dy * (1 - paddle.square(y))
-                    return grad
-
-            data = paddle.randn([2, 3], dtype="float64")
-            data.stop_gradient = False
-            z = cus_tanh.apply(data)
-            z.mean().backward()
-
-            print(data.grad)
+            >>> import paddle
+            >>> from paddle.autograd import PyLayer
+
+            >>> class cus_tanh(PyLayer):
+            ...     @staticmethod
+            ...     def forward(ctx, x):
+            ...         y = paddle.tanh(x)
+            ...         # Pass tensors to backward.
+            ...         ctx.save_for_backward(y)
+            ...         return y
+            ...
+            ...     @staticmethod
+            ...     def backward(ctx, dy):
+            ...         # Get the tensors passed by forward.
+            ...         y, = ctx.saved_tensor()
+            ...         grad = dy * (1 - paddle.square(y))
+            ...         return grad
+
+            >>> paddle.seed(2023)
+            >>> data = paddle.randn([2, 3], dtype="float64")
+            >>> data.stop_gradient = False
+            >>> z = cus_tanh.apply(data)
+            >>> z.mean().backward()
+
+            >>> print(data.grad)
     """
 
     @staticmethod
@@ -333,23 +336,23 @@ def forward(ctx, *args, **kwargs):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
         raise NotImplementedError(
             "You must implement the forward function for PyLayer."
@@ -373,23 +376,23 @@ def backward(ctx, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
 
         raise NotImplementedError(

From 2642075588b1e4f537908939a3cf448b01217553 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Mon, 31 Jul 2023 22:21:33 +0800
Subject: [PATCH 02/11] Revert "[Doctest]fix No.21, test=docs_preview"

This reverts commit 76bcdb280e254d682be6fc6f85588f1940bb1ade.
---
 python/paddle/autograd/py_layer.py | 381 ++++++++++++++---------------
 1 file changed, 189 insertions(+), 192 deletions(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 4a3eba71b1d84..d2dd31f08dcac 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -33,23 +33,23 @@ class PyLayerContext:
     Examples:
         .. code-block:: python
 
-            >>> import paddle
-            >>> from paddle.autograd import PyLayer
-
-            >>> class cus_tanh(PyLayer):
-            ...     @staticmethod
-            ...     def forward(ctx, x):
-            ...         # ctx is a object of PyLayerContext.
-            ...         y = paddle.tanh(x)
-            ...         ctx.save_for_backward(y)
-            ...         return y
-            ...
-            ...     @staticmethod
-            ...     def backward(ctx, dy):
-            ...         # ctx is a object of PyLayerContext.
-            ...         y, = ctx.saved_tensor()
-            ...         grad = dy * (1 - paddle.square(y))
-            ...         return grad
+            import paddle
+            from paddle.autograd import PyLayer
+
+            class cus_tanh(PyLayer):
+                @staticmethod
+                def forward(ctx, x):
+                    # ctx is a object of PyLayerContext.
+                    y = paddle.tanh(x)
+                    ctx.save_for_backward(y)
+                    return y
+
+                @staticmethod
+                def backward(ctx, dy):
+                    # ctx is a object of PyLayerContext.
+                    y, = ctx.saved_tensor()
+                    grad = dy * (1 - paddle.square(y))
+                    return grad
     """
 
     def save_for_backward(self, *tensors):
@@ -68,24 +68,24 @@ def save_for_backward(self, *tensors):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-
-                >>> class cus_tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         # ctx is a context object that store some objects for backward.
-                ...         y = paddle.tanh(x)
-                ...         # Pass tensors to backward.
-                ...         ctx.save_for_backward(y)
-                ...         return y
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, dy):
-                ...         # Get the tensors passed by forward.
-                ...         y, = ctx.saved_tensor()
-                ...         grad = dy * (1 - paddle.square(y))
-                ...         return grad
+                import paddle
+                from paddle.autograd import PyLayer
+
+                class cus_tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        # ctx is a context object that store some objects for backward.
+                        y = paddle.tanh(x)
+                        # Pass tensors to backward.
+                        ctx.save_for_backward(y)
+                        return y
+
+                    @staticmethod
+                    def backward(ctx, dy):
+                        # Get the tensors passed by forward.
+                        y, = ctx.saved_tensor()
+                        grad = dy * (1 - paddle.square(y))
+                        return grad
 
         """
         self.container = tensors
@@ -101,24 +101,24 @@ def saved_tensor(self):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-
-                >>> class cus_tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         # ctx is a context object that store some objects for backward.
-                ...         y = paddle.tanh(x)
-                ...         # Pass tensors to backward.
-                ...         ctx.save_for_backward(y)
-                ...         return y
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, dy):
-                ...         # Get the tensors passed by forward.
-                ...         y, = ctx.saved_tensor()
-                ...         grad = dy * (1 - paddle.square(y))
-                ...         return grad
+                import paddle
+                from paddle.autograd import PyLayer
+
+                class cus_tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        # ctx is a context object that store some objects for backward.
+                        y = paddle.tanh(x)
+                        # Pass tensors to backward.
+                        ctx.save_for_backward(y)
+                        return y
+
+                    @staticmethod
+                    def backward(ctx, dy):
+                        # Get the tensors passed by forward.
+                        y, = ctx.saved_tensor()
+                        grad = dy * (1 - paddle.square(y))
+                        return grad
         """
         return self.container
 
@@ -135,31 +135,30 @@ def mark_not_inplace(self, *args):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-
-                >>> class Exp(paddle.autograd.PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         ctx.mark_not_inplace(x)
-                ...         return x
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, grad_output):
-                ...         out = grad_output.exp()
-                ...         return out
-
-                >>> paddle.seed(2023)
-                >>> x = paddle.randn((1, 1))
-                >>> x.stop_gradient = False
-                >>> attn_layers = []
-                >>> for idx in range(0, 2):
-                ...     attn_layers.append(Exp())
-
-                >>> for step in range(0, 2):
-                ...     a = x
-                ...     for j in range(0,2):
-                ...         a = attn_layers[j].apply(x)
-                ...     a.backward()
+                import paddle
+
+                class Exp(paddle.autograd.PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        ctx.mark_not_inplace(x)
+                        return x
+
+                    @staticmethod
+                    def backward(ctx, grad_output):
+                        out = grad_output.exp()
+                        return out
+
+                x = paddle.randn((1, 1))
+                x.stop_gradient = False
+                attn_layers = []
+                for idx in range(0, 2):
+                    attn_layers.append(Exp())
+
+                for step in range(0, 2):
+                    a = x
+                    for j in range(0,2):
+                        a = attn_layers[j].apply(x)
+                    a.backward()
         """
         self.not_inplace_tensors = args
 
@@ -178,28 +177,28 @@ def mark_non_differentiable(self, *args):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-                >>> import numpy as np
-
-                >>> class Tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         a = x + x
-                ...         b = x + x + x
-                ...         ctx.mark_non_differentiable(a)
-                ...         return a, b
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, grad_a, grad_b):
-                ...         assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
-                ...         assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
-                ...         return grad_b
-
-                >>> x = paddle.ones([1], dtype="float64")
-                >>> x.stop_gradient = False
-                >>> a, b = Tanh.apply(x)
-                >>> b.sum().backward()
+                import paddle
+                from paddle.autograd import PyLayer
+                import numpy as np
+
+                class Tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        a = x + x
+                        b = x + x + x
+                        ctx.mark_non_differentiable(a)
+                        return a, b
+
+                    @staticmethod
+                    def backward(ctx, grad_a, grad_b):
+                        assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
+                        assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
+                        return grad_b
+
+                x = paddle.ones([1], dtype="float64")
+                x.stop_gradient = False
+                a, b = Tanh.apply(x)
+                b.sum().backward()
         """
         self.non_differentiable = args
 
@@ -217,39 +216,38 @@ def set_materialize_grads(self, value: bool):
         Examples:
             .. code-block:: python
 
-                # doctest: +SKIP('')
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-                >>> import numpy as np
-
-                >>> class Tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         return x+x+x, x+x
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, grad, grad2):
-                ...         assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
-                ...         return grad
-
-                >>> class Tanh2(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         ctx.set_materialize_grads(False)
-                ...         return x+x+x, x+x
-                ...
-                ...     @staticmethod
-                ...     def backward(ctx, grad, grad2):
-                ...         assert grad2==None
-                ...         return grad
-
-                >>> x = paddle.ones([1], dtype="float64")
-                >>> x.stop_gradient = False
-                >>> Tanh.apply(x)[0].backward()
-
-                >>> x2 = paddle.ones([1], dtype="float64")
-                >>> x2.stop_gradient = False
-                >>> Tanh2.apply(x2)[0].backward()
+                import paddle
+                from paddle.autograd import PyLayer
+                import numpy as np
+
+                class Tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        return x+x+x, x+x
+
+                    @staticmethod
+                    def backward(ctx, grad, grad2):
+                        assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
+                        return grad
+
+                class Tanh2(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        ctx.set_materialize_grads(False)
+                        return x+x+x, x+x
+
+                    @staticmethod
+                    def backward(ctx, grad, grad2):
+                        assert grad2==None
+                        return grad
+
+                x = paddle.ones([1], dtype="float64")
+                x.stop_gradient = False
+                Tanh.apply(x)[0].backward()
+
+                x2 = paddle.ones([1], dtype="float64")
+                x2.stop_gradient = False
+                Tanh2.apply(x2)[0].backward()
         """
         self.materialize_grads = value
 
@@ -292,31 +290,30 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     Examples:
         .. code-block:: python
 
-            >>> import paddle
-            >>> from paddle.autograd import PyLayer
-
-            >>> class cus_tanh(PyLayer):
-            ...     @staticmethod
-            ...     def forward(ctx, x):
-            ...         y = paddle.tanh(x)
-            ...         # Pass tensors to backward.
-            ...         ctx.save_for_backward(y)
-            ...         return y
-            ...
-            ...     @staticmethod
-            ...     def backward(ctx, dy):
-            ...         # Get the tensors passed by forward.
-            ...         y, = ctx.saved_tensor()
-            ...         grad = dy * (1 - paddle.square(y))
-            ...         return grad
-
-            >>> paddle.seed(2023)
-            >>> data = paddle.randn([2, 3], dtype="float64")
-            >>> data.stop_gradient = False
-            >>> z = cus_tanh.apply(data)
-            >>> z.mean().backward()
-
-            >>> print(data.grad)
+            import paddle
+            from paddle.autograd import PyLayer
+
+            class cus_tanh(PyLayer):
+                @staticmethod
+                def forward(ctx, x):
+                    y = paddle.tanh(x)
+                    # Pass tensors to backward.
+                    ctx.save_for_backward(y)
+                    return y
+
+                @staticmethod
+                def backward(ctx, dy):
+                    # Get the tensors passed by forward.
+                    y, = ctx.saved_tensor()
+                    grad = dy * (1 - paddle.square(y))
+                    return grad
+
+            data = paddle.randn([2, 3], dtype="float64")
+            data.stop_gradient = False
+            z = cus_tanh.apply(data)
+            z.mean().backward()
+
+            print(data.grad)
     """
 
     @staticmethod
@@ -336,23 +333,23 @@ def forward(ctx, *args, **kwargs):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-
-                >>> class cus_tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         y = paddle.tanh(x)
-                ...         # Pass tensors to backward.
-                ...         ctx.save_for_backward(y)
-                ...         return y
-
-                ...     @staticmethod
-                ...     def backward(ctx, dy):
-                ...         # Get the tensors passed by forward.
-                ...         y, = ctx.saved_tensor()
-                ...         grad = dy * (1 - paddle.square(y))
-                ...         return grad
+                import paddle
+                from paddle.autograd import PyLayer
+
+                class cus_tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        y = paddle.tanh(x)
+                        # Pass tensors to backward.
+                        ctx.save_for_backward(y)
+                        return y
+
+                    @staticmethod
+                    def backward(ctx, dy):
+                        # Get the tensors passed by forward.
+                        y, = ctx.saved_tensor()
+                        grad = dy * (1 - paddle.square(y))
+                        return grad
         """
         raise NotImplementedError(
             "You must implement the forward function for PyLayer."
@@ -376,23 +373,23 @@ def backward(ctx, *args):
         Examples:
             .. code-block:: python
 
-                >>> import paddle
-                >>> from paddle.autograd import PyLayer
-
-                >>> class cus_tanh(PyLayer):
-                ...     @staticmethod
-                ...     def forward(ctx, x):
-                ...         y = paddle.tanh(x)
-                ...         # Pass tensors to backward.
-                ...         ctx.save_for_backward(y)
-                ...         return y
-
-                ...     @staticmethod
-                ...     def backward(ctx, dy):
-                ...         # Get the tensors passed by forward.
-                ...         y, = ctx.saved_tensor()
-                ...         grad = dy * (1 - paddle.square(y))
-                ...         return grad
+                import paddle
+                from paddle.autograd import PyLayer
+
+                class cus_tanh(PyLayer):
+                    @staticmethod
+                    def forward(ctx, x):
+                        y = paddle.tanh(x)
+                        # Pass tensors to backward.
+                        ctx.save_for_backward(y)
+                        return y
+
+                    @staticmethod
+                    def backward(ctx, dy):
+                        # Get the tensors passed by forward.
+                        y, = ctx.saved_tensor()
+                        grad = dy * (1 - paddle.square(y))
+                        return grad
         """
 
         raise NotImplementedError(

From 13cd18c115a389f0c3526ae586643d3697933920 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Mon, 31 Jul 2023 22:36:05 +0800
Subject: [PATCH 03/11] [Doctest]fix No.21, test=docs_preview

---
 python/paddle/autograd/py_layer.py | 376 +++++++++++++++--------------
 1 file changed, 189 insertions(+), 187 deletions(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index d2dd31f08dcac..123ded1aad946 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -33,23 +33,23 @@ class PyLayerContext:
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.autograd import PyLayer
-
-            class cus_tanh(PyLayer):
-                @staticmethod
-                def forward(ctx, x):
-                    # ctx is a object of PyLayerContext.
-                    y = paddle.tanh(x)
-                    ctx.save_for_backward(y)
-                    return y
-
-                @staticmethod
-                def backward(ctx, dy):
-                    # ctx is a object of PyLayerContext.
-                    y, = ctx.saved_tensor()
-                    grad = dy * (1 - paddle.square(y))
-                    return grad
+            >>> import paddle
+            >>> from paddle.autograd import PyLayer
+
+            >>> class cus_tanh(PyLayer):
+            ...     @staticmethod
+            ...     def forward(ctx, x):
+            ...         # ctx is a object of PyLayerContext.
+            ...         y = paddle.tanh(x)
+            ...         ctx.save_for_backward(y)
+            ...         return y
+            ...
+            ...     @staticmethod
+            ...     def backward(ctx, dy):
+            ...         # ctx is a object of PyLayerContext.
+            ...         y, = ctx.saved_tensor()
+            ...         grad = dy * (1 - paddle.square(y))
+            ...         return grad
     """
 
     def save_for_backward(self, *tensors):
@@ -68,24 +68,24 @@ def save_for_backward(self, *tensors):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        # ctx is a context object that store some objects for backward.
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         # ctx is a context object that store some objects for backward.
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
 
         """
         self.container = tensors
@@ -101,24 +101,24 @@ def saved_tensor(self):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        # ctx is a context object that store some objects for backward.
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         # ctx is a context object that store some objects for backward.
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
         return self.container
 
@@ -135,30 +135,31 @@ def mark_not_inplace(self, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-
-                class Exp(paddle.autograd.PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        ctx.mark_not_inplace(x)
-                        return x
-
-                    @staticmethod
-                    def backward(ctx, grad_output):
-                        out = grad_output.exp()
-                        return out
-
-                x = paddle.randn((1, 1))
-                x.stop_gradient = False
-                attn_layers = []
-                for idx in range(0, 2):
-                    attn_layers.append(Exp())
-
-                for step in range(0, 2):
-                    a = x
-                    for j in range(0,2):
-                        a = attn_layers[j].apply(x)
-                    a.backward()
+                >>> import paddle
+
+                >>> class Exp(paddle.autograd.PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         ctx.mark_not_inplace(x)
+                ...         return x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad_output):
+                ...         out = grad_output.exp()
+                ...         return out
+
+                >>> paddle.seed(2023)
+                >>> x = paddle.randn((1, 1))
+                >>> x.stop_gradient = False
+                >>> attn_layers = []
+                >>> for idx in range(0, 2):
+                ...     attn_layers.append(Exp())
+
+                >>> for step in range(0, 2):
+                ...     a = x
+                ...     for j in range(0,2):
+                ...         a = attn_layers[j].apply(x)
+                ...     a.backward()
         """
         self.not_inplace_tensors = args
 
@@ -177,28 +178,28 @@ def mark_non_differentiable(self, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-                import numpy as np
-
-                class Tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        a = x + x
-                        b = x + x + x
-                        ctx.mark_non_differentiable(a)
-                        return a, b
-
-                    @staticmethod
-                    def backward(ctx, grad_a, grad_b):
-                        assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
-                        assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
-                        return grad_b
-
-                x = paddle.ones([1], dtype="float64")
-                x.stop_gradient = False
-                a, b = Tanh.apply(x)
-                b.sum().backward()
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+                >>> import numpy as np
+
+                >>> class Tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         a = x + x
+                ...         b = x + x + x
+                ...         ctx.mark_non_differentiable(a)
+                ...         return a, b
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad_a, grad_b):
+                ...         assert np.equal(grad_a.numpy(), paddle.zeros([1]).numpy())
+                ...         assert np.equal(grad_b.numpy(), paddle.ones([1], dtype="float64").numpy())
+                ...         return grad_b
+
+                >>> x = paddle.ones([1], dtype="float64")
+                >>> x.stop_gradient = False
+                >>> a, b = Tanh.apply(x)
+                >>> b.sum().backward()
         """
         self.non_differentiable = args
 
@@ -216,38 +217,38 @@ def set_materialize_grads(self, value: bool):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-                import numpy as np
-
-                class Tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        return x+x+x, x+x
-
-                    @staticmethod
-                    def backward(ctx, grad, grad2):
-                        assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
-                        return grad
-
-                class Tanh2(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        ctx.set_materialize_grads(False)
-                        return x+x+x, x+x
-
-                    @staticmethod
-                    def backward(ctx, grad, grad2):
-                        assert grad2==None
-                        return grad
-
-                x = paddle.ones([1], dtype="float64")
-                x.stop_gradient = False
-                Tanh.apply(x)[0].backward()
-
-                x2 = paddle.ones([1], dtype="float64")
-                x2.stop_gradient = False
-                Tanh2.apply(x2)[0].backward()
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+                >>> import numpy as np
+
+                >>> class Tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         return x+x+x, x+x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad, grad2):
+                ...         assert np.equal(grad2.numpy(), paddle.zeros([1]).numpy())
+                ...         return grad
+
+                >>> class Tanh2(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         ctx.set_materialize_grads(False)
+                ...         return x+x+x, x+x
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, grad, grad2):
+                ...         assert grad2==None
+                ...         return grad
+
+                >>> x = paddle.ones([1], dtype="float64")
+                >>> x.stop_gradient = False
+                >>> Tanh.apply(x)[0].backward()
+
+                >>> x2 = paddle.ones([1], dtype="float64")
+                >>> x2.stop_gradient = False
+                >>> Tanh2.apply(x2)[0].backward()
         """
         self.materialize_grads = value
 
@@ -290,28 +291,29 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
     Examples:
         .. code-block:: python
 
-            import paddle
-            from paddle.autograd import PyLayer
-
-            class cus_tanh(PyLayer):
-                @staticmethod
-                def forward(ctx, x):
-                    y = paddle.tanh(x)
-                    # Pass tensors to backward.
-                    ctx.save_for_backward(y)
-                    return y
-
-                @staticmethod
-                def backward(ctx, dy):
-                    # Get the tensors passed by forward.
-                    y, = ctx.saved_tensor()
-                    grad = dy * (1 - paddle.square(y))
-                    return grad
-
-            data = paddle.randn([2, 3], dtype="float64")
-            data.stop_gradient = False
-            z = cus_tanh.apply(data)
-            z.mean().backward()
+            >>> import paddle
+            >>> from paddle.autograd import PyLayer
+
+            >>> class cus_tanh(PyLayer):
+            ...     @staticmethod
+            ...     def forward(ctx, x):
+            ...         y = paddle.tanh(x)
+            ...         # Pass tensors to backward.
+            ...         ctx.save_for_backward(y)
+            ...         return y
+            ...
+            ...     @staticmethod
+            ...     def backward(ctx, dy):
+            ...         # Get the tensors passed by forward.
+            ...         y, = ctx.saved_tensor()
+            ...         grad = dy * (1 - paddle.square(y))
+            ...         return grad
+
+            >>> paddle.seed(2023)
+            >>> data = paddle.randn([2, 3], dtype="float64")
+            >>> data.stop_gradient = False
+            >>> z = cus_tanh.apply(data)
+            >>> z.mean().backward()
 
             print(data.grad)
     """
@@ -333,23 +335,23 @@ def forward(ctx, *args, **kwargs):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
         raise NotImplementedError(
             "You must implement the forward function for PyLayer."
@@ -373,23 +375,23 @@ def backward(ctx, *args):
         Examples:
             .. code-block:: python
 
-                import paddle
-                from paddle.autograd import PyLayer
-
-                class cus_tanh(PyLayer):
-                    @staticmethod
-                    def forward(ctx, x):
-                        y = paddle.tanh(x)
-                        # Pass tensors to backward.
-                        ctx.save_for_backward(y)
-                        return y
-
-                    @staticmethod
-                    def backward(ctx, dy):
-                        # Get the tensors passed by forward.
-                        y, = ctx.saved_tensor()
-                        grad = dy * (1 - paddle.square(y))
-                        return grad
+                >>> import paddle
+                >>> from paddle.autograd import PyLayer
+
+                >>> class cus_tanh(PyLayer):
+                ...     @staticmethod
+                ...     def forward(ctx, x):
+                ...         y = paddle.tanh(x)
+                ...         # Pass tensors to backward.
+                ...         ctx.save_for_backward(y)
+                ...         return y
+                ...
+                ...     @staticmethod
+                ...     def backward(ctx, dy):
+                ...         # Get the tensors passed by forward.
+                ...         y, = ctx.saved_tensor()
+                ...         grad = dy * (1 - paddle.square(y))
+                ...         return grad
         """
 
         raise NotImplementedError(

From f97fe5e82904cc6c509ba4ef2baea9d890f4385e Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 06:08:57 +0800
Subject: [PATCH 04/11] fix bugs,test=docs_preview

---
 python/paddle/autograd/py_layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 123ded1aad946..0689aa174fc0b 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -315,7 +315,7 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
             >>> z = cus_tanh.apply(data)
             >>> z.mean().backward()
 
-            print(data.grad)
+            >>> print(data.grad)
     """
 
     @staticmethod

From 0168ff2974af754aeb3ec1138b3f142f4e99efaa Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 07:03:24 +0800
Subject: [PATCH 05/11] [Doctest]fix No.22-24,26,27, test=docs_preview

---
 python/paddle/autograd/saved_tensors_hooks.py | 104 +++++++++---------
 python/paddle/framework/dtype.py              |  52 +++++----
 python/paddle/framework/framework.py          |   8 +-
 python/paddle/framework/io_utils.py           |  22 ++--
 python/paddle/framework/random.py             |  24 ++--
 5 files changed, 112 insertions(+), 98 deletions(-)

diff --git a/python/paddle/autograd/saved_tensors_hooks.py b/python/paddle/autograd/saved_tensors_hooks.py
index d2be6b5e6bf52..709c646325ed0 100644
--- a/python/paddle/autograd/saved_tensors_hooks.py
+++ b/python/paddle/autograd/saved_tensors_hooks.py
@@ -45,58 +45,58 @@ class saved_tensors_hooks:
     Examples:
         .. code-block:: python
 
-        # Example1
-        import paddle
-
-        def pack_hook(x):
-            print("Packing", x)
-            return x.numpy()
-
-        def unpack_hook(x):
-            print("UnPacking", x)
-            return paddle.to_tensor(x)
-
-        a = paddle.ones([3,3])
-        b = paddle.ones([3,3]) * 2
-        a.stop_gradient = False
-        b.stop_gradient = False
-        with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook):
-            y = paddle.multiply(a, b)
-        y.sum().backward()
-
-        # Example2
-        import paddle
-        from paddle.autograd import PyLayer
-
-        class cus_multiply(PyLayer):
-            @staticmethod
-            def forward(ctx, a, b):
-                y = paddle.multiply(a, b)
-                ctx.save_for_backward(a, b)
-                return y
-
-            @staticmethod
-            def backward(ctx, dy):
-                a,b = ctx.saved_tensor()
-                grad_a = dy * a
-                grad_b = dy * b
-                return grad_a, grad_b
-
-        def pack_hook(x):
-            print("Packing", x)
-            return x.numpy()
-
-        def unpack_hook(x):
-            print("UnPacking", x)
-            return paddle.to_tensor(x)
-
-        a = paddle.ones([3,3])
-        b = paddle.ones([3,3]) * 2
-        a.stop_gradient = False
-        b.stop_gradient = False
-        with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook):
-            y = cus_multiply.apply(a, b)
-        y.sum().backward()
+        >>> # Example1
+        >>> import paddle
+
+        >>> def pack_hook(x):
+        ...     print("Packing", x)
+        ...     return x.numpy()
+
+        >>> def unpack_hook(x):
+        ...     print("UnPacking", x)
+        ...     return paddle.to_tensor(x)
+
+        >>> a = paddle.ones([3,3])
+        >>> b = paddle.ones([3,3]) * 2
+        >>> a.stop_gradient = False
+        >>> b.stop_gradient = False
+        >>> with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook):
+        ...     y = paddle.multiply(a, b)
+        >>> y.sum().backward()
+
+        >>> # Example2
+        >>> import paddle
+        >>> from paddle.autograd import PyLayer
+
+        >>> class cus_multiply(PyLayer):
+        ...     @staticmethod
+        ...     def forward(ctx, a, b):
+        ...         y = paddle.multiply(a, b)
+        ...         ctx.save_for_backward(a, b)
+        ...         return y
+        ...
+        ...     @staticmethod
+        ...     def backward(ctx, dy):
+        ...         a,b = ctx.saved_tensor()
+        ...         grad_a = dy * a
+        ...         grad_b = dy * b
+        ...         return grad_a, grad_b
+
+        >>> def pack_hook(x):
+        ...     print("Packing", x)
+        ...     return x.numpy()
+
+        >>> def unpack_hook(x):
+        ...     print("UnPacking", x)
+        ...     return paddle.to_tensor(x)
+
+        >>> a = paddle.ones([3,3])
+        >>> b = paddle.ones([3,3]) * 2
+        >>> a.stop_gradient = False
+        >>> b.stop_gradient = False
+        >>> with paddle.autograd.saved_tensors_hooks(pack_hook, unpack_hook):
+        ...     y = cus_multiply.apply(a, b)
+        >>> y.sum().backward()
     """
 
     def __init__(self, pack_hook, unpack_hook):
diff --git a/python/paddle/framework/dtype.py b/python/paddle/framework/dtype.py
index 42cd074d88f11..6640407084785 100644
--- a/python/paddle/framework/dtype.py
+++ b/python/paddle/framework/dtype.py
@@ -58,15 +58,19 @@ def iinfo(dtype):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            iinfo_uint8 = paddle.iinfo(paddle.uint8)
-            print(iinfo_uint8)
-            # paddle.iinfo(min=0, max=255, bits=8, dtype=uint8)
-            print(iinfo_uint8.min) # 0
-            print(iinfo_uint8.max) # 255
-            print(iinfo_uint8.bits) # 8
-            print(iinfo_uint8.dtype) # uint8
+            >>> import paddle
+
+            >>> iinfo_uint8 = paddle.iinfo(paddle.uint8)
+            >>> print(iinfo_uint8)
+            paddle.iinfo(min=0, max=255, bits=8, dtype=uint8)
+            >>> print(iinfo_uint8.min)
+            0
+            >>> print(iinfo_uint8.max)
+            255
+            >>> print(iinfo_uint8.bits)
+            8
+            >>> print(iinfo_uint8.dtype)
+            uint8
 
     """
     return core_iinfo(dtype)
@@ -98,17 +102,25 @@ def finfo(dtype):
     Examples:
         .. code-block:: python
 
-            import paddle
-
-            finfo_float32 = paddle.finfo(paddle.float32)
-            print(finfo_float32.min) # -3.40282e+38
-            print(finfo_float32.max) # 3.40282e+38
-            print(finfo_float32.eps) # 1.19209e-07
-            print(finfo_float32.resolution) # 1e-06
-            print(finfo_float32.smallest_normal) # 1.17549e-38
-            print(finfo_float32.tiny) # 1.17549e-38
-            print(finfo_float32.bits) # 32
-            print(finfo_float32.dtype) # float32
+            >>> import paddle
+
+            >>> finfo_float32 = paddle.finfo(paddle.float32)
+            >>> print(finfo_float32.min)
+            -3.4028234663852886e+38
+            >>> print(finfo_float32.max)
+            3.4028234663852886e+38
+            >>> print(finfo_float32.eps)
+            1.1920928955078125e-07
+            >>> print(finfo_float32.resolution)
+            1e-06
+            >>> print(finfo_float32.smallest_normal)
+            1.1754943508222875e-38
+            >>> print(finfo_float32.tiny)
+            1.1754943508222875e-38
+            >>> print(finfo_float32.bits)
+            32
+            >>> print(finfo_float32.dtype)
+            float32
 
     """
     return core_finfo(dtype)
diff --git a/python/paddle/framework/framework.py b/python/paddle/framework/framework.py
index 563c4b4cd3b20..6f8c7c7555bf0 100644
--- a/python/paddle/framework/framework.py
+++ b/python/paddle/framework/framework.py
@@ -35,8 +35,8 @@ def set_default_dtype(d):
     Examples:
         .. code-block:: python
 
-            import paddle
-            paddle.set_default_dtype("float32")
+            >>> import paddle
+            >>> paddle.set_default_dtype("float32")
 
     """
     if isinstance(d, type):
@@ -76,7 +76,7 @@ def get_default_dtype():
     Examples:
         .. code-block:: python
 
-            import paddle
-            paddle.get_default_dtype()
+            >>> import paddle
+            >>> paddle.get_default_dtype()
     """
     return LayerHelperBase.get_default_dtype()
diff --git a/python/paddle/framework/io_utils.py b/python/paddle/framework/io_utils.py
index b4c78c6748a1b..cef1d661f45e3 100644
--- a/python/paddle/framework/io_utils.py
+++ b/python/paddle/framework/io_utils.py
@@ -94,12 +94,13 @@ def is_persistable(var):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.fluid as fluid
+            >>> # doctest: +SKIP('ValueError: var fc.b not in this block')            
+            >>> import paddle
+            >>> import paddle.fluid as fluid
 
-            paddle.enable_static()
-            param = fluid.default_main_program().global_block().var('fc.b')
-            res = fluid.io.is_persistable(param)
+            >>> paddle.enable_static()
+            >>> param = fluid.default_main_program().global_block().var('fc.b')
+            >>> res = fluid.io.is_persistable(param)
     """
     if (
         var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH
@@ -124,12 +125,13 @@ def is_parameter(var):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.fluid as fluid
+            >>> # doctest: +SKIP('ValueError: var fc.w not in this block')
+            >>> import paddle
+            >>> import paddle.fluid as fluid
 
-            paddle.enable_static()
-            param = fluid.default_main_program().global_block().var('fc.w')
-            res = fluid.io.is_parameter(param)
+            >>> paddle.enable_static()
+            >>> param = fluid.default_main_program().global_block().var('fc.w')
+            >>> res = fluid.io.is_parameter(param)
     """
     return isinstance(var, Parameter)
 
diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py
index fff7f5eecd987..9670e79b457bd 100644
--- a/python/paddle/framework/random.py
+++ b/python/paddle/framework/random.py
@@ -34,8 +34,8 @@ def seed(seed):
     Examples:
         .. code-block:: python
 
-            import paddle
-            gen = paddle.seed(102)
+            >>> import paddle
+            >>> gen = paddle.seed(102)
 
     """
     # TODO(zhiqiu): 1. remove program.random_seed when all random-related op upgrade
@@ -75,8 +75,8 @@ def get_rng_state(device=None):
         GeneratorState:  object.
     Examples:
         .. code-block:: python
-            import paddle
-            sts = paddle.get_rng_state()
+            >>> import paddle
+            >>> sts = paddle.get_rng_state()
     """
     state_list = []
     if device is None:
@@ -129,8 +129,8 @@ def get_cuda_rng_state():
     Examples:
         .. code-block:: python
 
-            import paddle
-            sts = paddle.get_cuda_rng_state()
+            >>> import paddle
+            >>> sts = paddle.get_cuda_rng_state()
 
     """
     state_list = []
@@ -158,9 +158,9 @@ def set_rng_state(state_list, device=None):
     Examples:
         .. code-block:: python
 
-            import paddle
-            sts = paddle.get_rng_state()
-            paddle.set_rng_state(sts)
+            >>> import paddle
+            >>> sts = paddle.get_rng_state()
+            >>> paddle.set_rng_state(sts)
 
     """
     if device is None:
@@ -223,9 +223,9 @@ def set_cuda_rng_state(state_list):
     Examples:
         .. code-block:: python
 
-            import paddle
-            sts = paddle.get_cuda_rng_state()
-            paddle.set_cuda_rng_state(sts)
+            >>> import paddle
+            >>> sts = paddle.get_cuda_rng_state()
+            >>> paddle.set_cuda_rng_state(sts)
 
     """
     if core.is_compiled_with_cuda():

From be38a669f4e7541a9eb2ad685e6ac9ea47e87c1b Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 09:34:30 +0800
Subject: [PATCH 06/11] update fix

---
 python/paddle/framework/io.py            | 308 +++++++++++------------
 python/paddle/nn/initializer/Bilinear.py |  39 +--
 python/paddle/nn/initializer/assign.py   | 103 ++++----
 python/paddle/nn/initializer/constant.py |  25 +-
 4 files changed, 243 insertions(+), 232 deletions(-)

diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py
index 2c526afc98e18..cf338f6250bdc 100644
--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
@@ -677,100 +677,100 @@ def save(obj, path, protocol=4, **configs):
         .. code-block:: python
             :name: code-example-1
 
-            # example 1: dynamic graph
-            import paddle
-            emb = paddle.nn.Embedding(10, 10)
-            layer_state_dict = emb.state_dict()
-
-            # save state_dict of emb
-            paddle.save(layer_state_dict, "emb.pdparams")
-
-            scheduler = paddle.optimizer.lr.NoamDecay(
-                d_model=0.01, warmup_steps=100, verbose=True)
-            adam = paddle.optimizer.Adam(
-                learning_rate=scheduler,
-                parameters=emb.parameters())
-            opt_state_dict = adam.state_dict()
-
-            # save state_dict of optimizer
-            paddle.save(opt_state_dict, "adam.pdopt")
-            # save weight of emb
-            paddle.save(emb.weight, "emb.weight.pdtensor")
+            >>> # example 1: dynamic graph
+            >>> import paddle
+            >>> emb = paddle.nn.Embedding(10, 10)
+            >>> layer_state_dict = emb.state_dict()
+
+            >>> # save state_dict of emb
+            >>> paddle.save(layer_state_dict, "emb.pdparams")
+
+            >>> scheduler = paddle.optimizer.lr.NoamDecay(
+            ...     d_model=0.01, warmup_steps=100, verbose=True)
+            >>> adam = paddle.optimizer.Adam(
+            ...     learning_rate=scheduler,
+            ...     parameters=emb.parameters())
+            >>> opt_state_dict = adam.state_dict()
+
+            >>> # save state_dict of optimizer
+            >>> paddle.save(opt_state_dict, "adam.pdopt")
+            >>> # save weight of emb
+            >>> paddle.save(emb.weight, "emb.weight.pdtensor")
 
         .. code-block:: python
             :name: code-example-2
 
-            # example 2: Save multiple state_dict at the same time
-            import paddle
-            from paddle import nn
-            from paddle.optimizer import Adam
+            >>> # example 2: Save multiple state_dict at the same time
+            >>> import paddle
+            >>> from paddle import nn
+            >>> from paddle.optimizer import Adam
 
-            layer = paddle.nn.Linear(3, 4)
-            adam = Adam(learning_rate=0.001, parameters=layer.parameters())
-            obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100}
-            path = 'example/model.pdparams'
-            paddle.save(obj, path)
+            >>> layer = paddle.nn.Linear(3, 4)
+            >>> adam = Adam(learning_rate=0.001, parameters=layer.parameters())
+            >>> obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100}
+            >>> path = 'example/model.pdparams'
+            >>> paddle.save(obj, path)
 
         .. code-block:: python
             :name: code-example-3
 
-            # example 3: static graph
-            import paddle
-            import paddle.static as static
+            >>> # example 3: static graph
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            # create network
-            x = paddle.static.data(name="x", shape=[None, 224], dtype='float32')
-            z = paddle.static.nn.fc(x, 10)
+            >>> # create network
+            >>> x = paddle.static.data(name="x", shape=[None, 224], dtype='float32')
+            >>> z = paddle.static.nn.fc(x, 10)
 
-            place = paddle.CPUPlace()
-            exe = paddle.static.Executor(place)
-            exe.run(paddle.static.default_startup_program())
-            prog = paddle.static.default_main_program()
-            for var in prog.list_vars():
-                if list(var.shape) == [224, 10]:
-                    tensor = var.get_value()
-                    break
+            >>> place = paddle.CPUPlace()
+            >>> exe = paddle.static.Executor(place)
+            >>> exe.run(paddle.static.default_startup_program())
+            >>> prog = paddle.static.default_main_program()
+            >>> for var in prog.list_vars():
+            ...     if list(var.shape) == [224, 10]:
+            ...         tensor = var.get_value()
+            ...         break
 
-            # save/load tensor
-            path_tensor = 'temp/tensor.pdtensor'
-            paddle.save(tensor, path_tensor)
+            >>> # save/load tensor
+            >>> path_tensor = 'temp/tensor.pdtensor'
+            >>> paddle.save(tensor, path_tensor)
 
-            # save/load state_dict
-            path_state_dict = 'temp/model.pdparams'
-            paddle.save(prog.state_dict("param"), path_tensor)
+            >>> # save/load state_dict
+            >>> path_state_dict = 'temp/model.pdparams'
+            >>> paddle.save(prog.state_dict("param"), path_tensor)
 
         .. code-block:: python
             :name: code-example-4
 
-            # example 4: save program
-            import paddle
+            >>> # example 4: save program
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            data = paddle.static.data(
-                name='x_static_save', shape=(None, 224), dtype='float32')
-            y_static = z = paddle.static.nn.fc(data, 10)
-            main_program = paddle.static.default_main_program()
-            path = "example/main_program.pdmodel"
-            paddle.save(main_program, path)
+            >>> data = paddle.static.data(
+            ...     name='x_static_save', shape=(None, 224), dtype='float32')
+            >>> y_static = z = paddle.static.nn.fc(data, 10)
+            >>> main_program = paddle.static.default_main_program()
+            >>> path = "example/main_program.pdmodel"
+            >>> paddle.save(main_program, path)
 
         .. code-block:: python
             :name: code-example-5
 
-            # example 5: save object to memory
-            from io import BytesIO
-            import paddle
-            from paddle.nn import Linear
-            paddle.disable_static()
+            >>> # example 5: save object to memory
+            >>> from io import BytesIO
+            >>> import paddle
+            >>> from paddle.nn import Linear
+            >>> paddle.disable_static()
 
-            linear = Linear(5, 10)
-            state_dict = linear.state_dict()
-            byio = BytesIO()
-            paddle.save(state_dict, byio)
-            tensor = paddle.randn([2, 3], dtype='float32')
-            paddle.save(tensor, byio)
+            >>> linear = Linear(5, 10)
+            >>> state_dict = linear.state_dict()
+            >>> byio = BytesIO()
+            >>> paddle.save(state_dict, byio)
+            >>> tensor = paddle.randn([2, 3], dtype='float32')
+            >>> paddle.save(tensor, byio)
 
     '''
     if _is_file_path(path):
@@ -938,115 +938,115 @@ def load(path, **configs):
         .. code-block:: python
             :name: code-example-1
 
-            # example 1: dynamic graph
-            import paddle
-            emb = paddle.nn.Embedding(10, 10)
-            layer_state_dict = emb.state_dict()
-
-            # save state_dict of emb
-            paddle.save(layer_state_dict, "emb.pdparams")
-
-            scheduler = paddle.optimizer.lr.NoamDecay(
-                d_model=0.01, warmup_steps=100, verbose=True)
-            adam = paddle.optimizer.Adam(
-                learning_rate=scheduler,
-                parameters=emb.parameters())
-            opt_state_dict = adam.state_dict()
-
-            # save state_dict of optimizer
-            paddle.save(opt_state_dict, "adam.pdopt")
-            # save weight of emb
-            paddle.save(emb.weight, "emb.weight.pdtensor")
-
-            # load state_dict of emb
-            load_layer_state_dict = paddle.load("emb.pdparams")
-            # load state_dict of optimizer
-            load_opt_state_dict = paddle.load("adam.pdopt")
-            # load weight of emb
-            load_weight = paddle.load("emb.weight.pdtensor")
+            >>> # example 1: dynamic graph
+            >>> import paddle
+            >>> emb = paddle.nn.Embedding(10, 10)
+            >>> layer_state_dict = emb.state_dict()
+
+            >>> # save state_dict of emb
+            >>> paddle.save(layer_state_dict, "emb.pdparams")
+
+            >>> scheduler = paddle.optimizer.lr.NoamDecay(
+            ...     d_model=0.01, warmup_steps=100, verbose=True)
+            >>> adam = paddle.optimizer.Adam(
+            ...     learning_rate=scheduler,
+            ...     parameters=emb.parameters())
+            >>> opt_state_dict = adam.state_dict()
+
+            >>> # save state_dict of optimizer
+            >>> paddle.save(opt_state_dict, "adam.pdopt")
+            >>> # save weight of emb
+            >>> paddle.save(emb.weight, "emb.weight.pdtensor")
+
+            >>> # load state_dict of emb
+            >>> load_layer_state_dict = paddle.load("emb.pdparams")
+            >>> # load state_dict of optimizer
+            >>> load_opt_state_dict = paddle.load("adam.pdopt")
+            >>> # load weight of emb
+            >>> load_weight = paddle.load("emb.weight.pdtensor")
 
         .. code-block:: python
             :name: code-example-2
 
-            # example 2: Load multiple state_dict at the same time
-            import paddle
-            from paddle import nn
-            from paddle.optimizer import Adam
+            >>> # example 2: Load multiple state_dict at the same time
+            >>> import paddle
+            >>> from paddle import nn
+            >>> from paddle.optimizer import Adam
 
-            layer = paddle.nn.Linear(3, 4)
-            adam = Adam(learning_rate=0.001, parameters=layer.parameters())
-            obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100}
-            path = 'example/model.pdparams'
-            paddle.save(obj, path)
-            obj_load = paddle.load(path)
+            >>> layer = paddle.nn.Linear(3, 4)
+            >>> adam = Adam(learning_rate=0.001, parameters=layer.parameters())
+            >>> obj = {'model': layer.state_dict(), 'opt': adam.state_dict(), 'epoch': 100}
+            >>> path = 'example/model.pdparams'
+            >>> paddle.save(obj, path)
+            >>> obj_load = paddle.load(path)
 
         .. code-block:: python
             :name: code-example-3
 
-            # example 3: static graph
-            import paddle
-            import paddle.static as static
+            >>> # example 3: static graph
+            >>> import paddle
+            >>> import paddle.static as static
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            # create network
-            x = paddle.static.data(name="x", shape=[None, 224], dtype='float32')
-            z = paddle.static.nn.fc(x, 10)
+            >>> # create network
+            >>> x = paddle.static.data(name="x", shape=[None, 224], dtype='float32')
+            >>> z = paddle.static.nn.fc(x, 10)
 
-            place = paddle.CPUPlace()
-            exe = paddle.static.Executor(place)
-            exe.run(paddle.static.default_startup_program())
-            prog = paddle.static.default_main_program()
-            for var in prog.list_vars():
-                if list(var.shape) == [224, 10]:
-                    tensor = var.get_value()
-                    break
+            >>> place = paddle.CPUPlace()
+            >>> exe = paddle.static.Executor(place)
+            >>> exe.run(paddle.static.default_startup_program())
+            >>> prog = paddle.static.default_main_program()
+            >>> for var in prog.list_vars():
+            ...     if list(var.shape) == [224, 10]:
+            ...         tensor = var.get_value()
+            ...         break
 
-            # save/load tensor
-            path_tensor = 'temp/tensor.pdtensor'
-            paddle.save(tensor, path_tensor)
-            load_tensor = paddle.load(path_tensor)
+            >>> # save/load tensor
+            >>> path_tensor = 'temp/tensor.pdtensor'
+            >>> paddle.save(tensor, path_tensor)
+            >>> load_tensor = paddle.load(path_tensor)
 
-            # save/load state_dict
-            path_state_dict = 'temp/model.pdparams'
-            paddle.save(prog.state_dict("param"), path_tensor)
-            load_state_dict = paddle.load(path_tensor)
+            >>> # save/load state_dict
+            >>> path_state_dict = 'temp/model.pdparams'
+            >>> paddle.save(prog.state_dict("param"), path_tensor)
+            >>> load_state_dict = paddle.load(path_tensor)
 
         .. code-block:: python
             :name: code-example-4
 
-            # example 4: load program
-            import paddle
+            >>> # example 4: load program
+            >>> import paddle
 
-            paddle.enable_static()
+            >>> paddle.enable_static()
 
-            data = paddle.static.data(
-                name='x_static_save', shape=(None, 224), dtype='float32')
-            y_static = z = paddle.static.nn.fc(data, 10)
-            main_program = paddle.static.default_main_program()
-            path = "example/main_program.pdmodel"
-            paddle.save(main_program, path)
-            load_main = paddle.load(path)
-            print(load_main)
+            >>> data = paddle.static.data(
+            ...     name='x_static_save', shape=(None, 224), dtype='float32')
+            >>> y_static = z = paddle.static.nn.fc(data, 10)
+            >>> main_program = paddle.static.default_main_program()
+            >>> path = "example/main_program.pdmodel"
+            >>> paddle.save(main_program, path)
+            >>> load_main = paddle.load(path)
+            >>> print(load_main)
 
         .. code-block:: python
             :name: code-example-5
 
-            # example 5: save object to memory
-            from io import BytesIO
-            import paddle
-            from paddle.nn import Linear
-            paddle.disable_static()
-
-            linear = Linear(5, 10)
-            state_dict = linear.state_dict()
-            byio = BytesIO()
-            paddle.save(state_dict, byio)
-            tensor = paddle.randn([2, 3], dtype='float32')
-            paddle.save(tensor, byio)
-            byio.seek(0)
-            # load state_dict
-            dict_load = paddle.load(byio)
+            >>> # example 5: save object to memory
+            >>> from io import BytesIO
+            >>> import paddle
+            >>> from paddle.nn import Linear
+            >>> paddle.disable_static()
+
+            >>> linear = Linear(5, 10)
+            >>> state_dict = linear.state_dict()
+            >>> byio = BytesIO()
+            >>> paddle.save(state_dict, byio)
+            >>> tensor = paddle.randn([2, 3], dtype='float32')
+            >>> paddle.save(tensor, byio)
+            >>> byio.seek(0)
+            >>> # load state_dict
+            >>> dict_load = paddle.load(byio)
 
     '''
 
diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py
index 9782521c959fc..90f9617221e61 100644
--- a/python/paddle/nn/initializer/Bilinear.py
+++ b/python/paddle/nn/initializer/Bilinear.py
@@ -36,29 +36,30 @@ class Bilinear(Initializer):
 
         .. code-block:: python
 
-            import math
+            >>> import math
 
-            import paddle
-            import paddle.nn as nn
-            from paddle.regularizer import L2Decay
+            >>> import paddle
+            >>> import paddle.nn as nn
+            >>> from paddle.regularizer import L2Decay
 
-            factor = 2
-            C = 2
-            B = 8
-            H = W = 32
-            w_attr = paddle.ParamAttr(learning_rate=0.,
+            >>> factor = 2
+            >>> C = 2
+            >>> B = 8
+            >>> H = W = 32
+            >>> w_attr = paddle.ParamAttr(learning_rate=0.,
                                       regularizer=L2Decay(0.),
                                       initializer=nn.initializer.Bilinear())
-            data = paddle.rand([B, 3, H, W], dtype='float32')
-            conv_up = nn.Conv2DTranspose(3,
-                                         out_channels=C,
-                                         kernel_size=2 * factor - factor % 2,
-                                         padding=int(
-                                             math.ceil((factor - 1) / 2.)),
-                                         stride=factor,
-                                         weight_attr=w_attr,
-                                         bias_attr=False)
-            x = conv_up(data)
+            >>> paddle.seed(2023)
+            >>> data = paddle.rand([B, 3, H, W], dtype='float32')
+            >>> conv_up = nn.Conv2DTranspose(3,
+            ...                              out_channels=C,
+            ...                              kernel_size=2 * factor - factor % 2,
+            ...                              padding=int(
+            ...                                  math.ceil((factor - 1) / 2.)),
+            ...                              stride=factor,
+            ...                              weight_attr=w_attr,
+            ...                              bias_attr=False)
+            >>> x = conv_up(data)
 
     Where, `out_channels=C` and `groups=C` means this is channel-wise transposed
     convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`,
diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py
index aaa198ec46942..b85f3e7509fa8 100644
--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@@ -153,53 +153,62 @@ class Assign(NumpyArrayInitializer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import numpy as np
-
-            # numpy array
-            data_1 = paddle.ones(shape=[1, 2], dtype='float32')
-            weight_attr_1 = paddle.framework.ParamAttr(
-                name="linear_weight_1",
-                initializer=paddle.nn.initializer.Assign(np.array([2, 2])))
-            bias_attr_1 = paddle.framework.ParamAttr(
-                name="linear_bias_1",
-                initializer=paddle.nn.initializer.Assign(np.array([2])))
-            linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1)
-            # linear_1.weight:  [2. 2.]
-            # linear_1.bias:  [2.]
-
-            res_1 = linear_1(data_1)
-            # res_1:  [6.]
-
-            # python list
-            data_2 = paddle.ones(shape=[1, 2], dtype='float32')
-            weight_attr_2 = paddle.framework.ParamAttr(
-                name="linear_weight_2",
-                initializer=paddle.nn.initializer.Assign([2, 2]))
-            bias_attr_2 = paddle.framework.ParamAttr(
-                name="linear_bias_2",
-                initializer=paddle.nn.initializer.Assign([2]))
-            linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2)
-            # linear_2.weight:  [2. 2.]
-            # linear_2.bias:  [2.]
-
-            res_2 = linear_2(data_2)
-            # res_2:  [6.]
-
-            # tensor
-            data_3 = paddle.ones(shape=[1, 2], dtype='float32')
-            weight_attr_3 = paddle.framework.ParamAttr(
-                name="linear_weight_3",
-                initializer=paddle.nn.initializer.Assign(paddle.full([2], 2)))
-            bias_attr_3 = paddle.framework.ParamAttr(
-                name="linear_bias_3",
-                initializer=paddle.nn.initializer.Assign(paddle.full([1], 2)))
-            linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3)
-            # linear_3.weight:  [2. 2.]
-            # linear_3.bias:  [2.]
-
-            res_3 = linear_3(data_3)
-            # res_3:  [6.]
+            >>> import paddle
+            >>> import numpy as np
+
+            >>> # numpy array
+            >>> data_1 = paddle.ones(shape=[1, 2], dtype='float32')
+            >>> weight_attr_1 = paddle.framework.ParamAttr(
+            ...     name="linear_weight_1",
+            ...     initializer=paddle.nn.initializer.Assign(np.array([2, 2])))
+            >>> bias_attr_1 = paddle.framework.ParamAttr(
+            ...     name="linear_bias_1",
+            ...     initializer=paddle.nn.initializer.Assign(np.array([2])))
+            >>> linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1)
+            >>> print(linear_1.weight.numpy())
+            [2. 2.]
+            >>> print(linear_1.bias.numpy())
+            [2.]
+
+            >>> res_1 = linear_1(data_1)
+            >>> print(res_1.numpy())
+            [6.]
+
+            >>> # python list
+            >>> data_2 = paddle.ones(shape=[1, 2], dtype='float32')
+            >>> weight_attr_2 = paddle.framework.ParamAttr(
+            ...     name="linear_weight_2",
+            ...     initializer=paddle.nn.initializer.Assign([2, 2]))
+            >>> bias_attr_2 = paddle.framework.ParamAttr(
+            ...     name="linear_bias_2",
+            ...     initializer=paddle.nn.initializer.Assign([2]))
+            >>> linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2)
+            >>> print(linear_2.weight.numpy())
+            [2. 2.]
+            >>> print(linear_2.bias.numpy())
+            [2.]
+
+            >>> res_2 = linear_2(data_2)
+            >>> print(res_2.numpy())
+            [6.]
+
+            >>> # tensor
+            >>> data_3 = paddle.ones(shape=[1, 2], dtype='float32')
+            >>> weight_attr_3 = paddle.framework.ParamAttr(
+            ...     name="linear_weight_3",
+            ...     initializer=paddle.nn.initializer.Assign(paddle.full([2], 2)))
+            >>> bias_attr_3 = paddle.framework.ParamAttr(
+            ...     name="linear_bias_3",
+            ...     initializer=paddle.nn.initializer.Assign(paddle.full([1], 2)))
+            >>> linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3)
+            >>> print(linear_3.weight.numpy())
+            [2. 2.]
+            >>> print(linear_3.bias.numpy())
+            [2.]
+
+            >>> res_3 = linear_3(data_3)
+            >>> print(res_3.numpy())
+            [6.]
     """
 
     def __init__(self, value, name=None):
diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py
index d58aa653cb6e6..b13f2696693a4 100644
--- a/python/paddle/nn/initializer/constant.py
+++ b/python/paddle/nn/initializer/constant.py
@@ -88,18 +88,19 @@ class Constant(ConstantInitializer):
     Examples:
         .. code-block:: python
 
-            import paddle
-            import paddle.nn as nn
-
-            data = paddle.rand([30, 10, 2], dtype='float32')
-            linear = nn.Linear(2,
-                                4,
-                                weight_attr=nn.initializer.Constant(value=2.0))
-            res = linear(data)
-            print(linear.weight)
-            # Tensor(shape=[2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
-            #        [[2., 2., 2., 2.],
-            #         [2., 2., 2., 2.]])
+            >>> import paddle
+            >>> import paddle.nn as nn
+
+            >>> paddle.seed(2023)
+            >>> data = paddle.rand([30, 10, 2], dtype='float32')
+            >>> linear = nn.Linear(2,
+            ...                     4,
+            ...                     weight_attr=nn.initializer.Constant(value=2.0))
+            >>> res = linear(data)
+            >>> print(linear.weight)
+            Tensor(shape=[2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+            [[2., 2., 2., 2.],
+             [2., 2., 2., 2.]])
 
     """
 

From a84d608aab745d4a6b416cc9a3a8fd419737aabe Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 10:03:27 +0800
Subject: [PATCH 07/11] with pre-commit, test=docs_preview

---
 python/paddle/framework/io_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/framework/io_utils.py b/python/paddle/framework/io_utils.py
index cef1d661f45e3..1c72bc2852d0c 100644
--- a/python/paddle/framework/io_utils.py
+++ b/python/paddle/framework/io_utils.py
@@ -94,7 +94,7 @@ def is_persistable(var):
     Examples:
         .. code-block:: python
 
-            >>> # doctest: +SKIP('ValueError: var fc.b not in this block')            
+            >>> # doctest: +SKIP('ValueError: var fc.b not in this block')
             >>> import paddle
             >>> import paddle.fluid as fluid
 

From f3e18e675b8c277841fd042a9bf232714fa1b8d2 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 13:57:02 +0800
Subject: [PATCH 08/11] fix seed, test=docs_preview

---
 python/paddle/framework/io.py            | 2 ++
 python/paddle/framework/random.py        | 4 ++++
 python/paddle/nn/initializer/constant.py | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py
index cf338f6250bdc..c3a07bc1660a3 100644
--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
@@ -769,6 +769,7 @@ def save(obj, path, protocol=4, **configs):
             >>> state_dict = linear.state_dict()
             >>> byio = BytesIO()
             >>> paddle.save(state_dict, byio)
+            >>> paddle.seed(2023)
             >>> tensor = paddle.randn([2, 3], dtype='float32')
             >>> paddle.save(tensor, byio)
 
@@ -1042,6 +1043,7 @@ def load(path, **configs):
             >>> state_dict = linear.state_dict()
             >>> byio = BytesIO()
             >>> paddle.save(state_dict, byio)
+            >>> paddle.seed(2023)
             >>> tensor = paddle.randn([2, 3], dtype='float32')
             >>> paddle.save(tensor, byio)
             >>> byio.seek(0)
diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py
index 9670e79b457bd..87862ac147992 100644
--- a/python/paddle/framework/random.py
+++ b/python/paddle/framework/random.py
@@ -76,6 +76,7 @@ def get_rng_state(device=None):
     Examples:
         .. code-block:: python
             >>> import paddle
+            >>> paddle.seed(2023)
             >>> sts = paddle.get_rng_state()
     """
     state_list = []
@@ -130,6 +131,7 @@ def get_cuda_rng_state():
         .. code-block:: python
 
             >>> import paddle
+            >>> paddle.seed(2023)
             >>> sts = paddle.get_cuda_rng_state()
 
     """
@@ -159,6 +161,7 @@ def set_rng_state(state_list, device=None):
         .. code-block:: python
 
             >>> import paddle
+            >>> paddle.seed(2023)
             >>> sts = paddle.get_rng_state()
             >>> paddle.set_rng_state(sts)
 
@@ -224,6 +227,7 @@ def set_cuda_rng_state(state_list):
         .. code-block:: python
 
             >>> import paddle
+            >>> paddle.seed(2023)
             >>> sts = paddle.get_cuda_rng_state()
             >>> paddle.set_cuda_rng_state(sts)
 
diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py
index b13f2696693a4..f4eb29be6ac3f 100644
--- a/python/paddle/nn/initializer/constant.py
+++ b/python/paddle/nn/initializer/constant.py
@@ -98,7 +98,7 @@ class Constant(ConstantInitializer):
             ...                     weight_attr=nn.initializer.Constant(value=2.0))
             >>> res = linear(data)
             >>> print(linear.weight)
-            Tensor(shape=[2, 4], dtype=float32, place=Place(gpu:0), stop_gradient=False,
+            Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
             [[2., 2., 2., 2.],
              [2., 2., 2., 2.]])
 

From 56f212dc791d70118c70659ae486e534a8cac3d4 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 16:38:11 +0800
Subject: [PATCH 09/11] fix error, test=docs_preview

---
 python/paddle/autograd/py_layer.py       | 3 +++
 python/paddle/framework/io.py            | 1 -
 python/paddle/nn/initializer/Bilinear.py | 4 ++--
 python/paddle/nn/initializer/constant.py | 1 +
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 0689aa174fc0b..95a0a42aafdd2 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -316,6 +316,9 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
             >>> z.mean().backward()
 
             >>> print(data.grad)
+            Tensor(shape=[2, 3], dtype=float64, place=Place(cpu), stop_gradient=True,
+            [[0.05858341, 0.16604150, 0.15677770],
+             [0.14051214, 0.02991660, 0.01564609]])
     """
 
     @staticmethod
diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py
index c3a07bc1660a3..cc9ed4768ced9 100644
--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
@@ -1028,7 +1028,6 @@ def load(path, **configs):
             >>> path = "example/main_program.pdmodel"
             >>> paddle.save(main_program, path)
             >>> load_main = paddle.load(path)
-            >>> print(load_main)
 
         .. code-block:: python
             :name: code-example-5
diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py
index 90f9617221e61..09177286a5208 100644
--- a/python/paddle/nn/initializer/Bilinear.py
+++ b/python/paddle/nn/initializer/Bilinear.py
@@ -47,8 +47,8 @@ class Bilinear(Initializer):
             >>> B = 8
             >>> H = W = 32
             >>> w_attr = paddle.ParamAttr(learning_rate=0.,
-                                      regularizer=L2Decay(0.),
-                                      initializer=nn.initializer.Bilinear())
+            ...                           regularizer=L2Decay(0.),
+            ...                           initializer=nn.initializer.Bilinear())
             >>> paddle.seed(2023)
             >>> data = paddle.rand([B, 3, H, W], dtype='float32')
             >>> conv_up = nn.Conv2DTranspose(3,
diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py
index f4eb29be6ac3f..355bac8b784e3 100644
--- a/python/paddle/nn/initializer/constant.py
+++ b/python/paddle/nn/initializer/constant.py
@@ -98,6 +98,7 @@ class Constant(ConstantInitializer):
             ...                     weight_attr=nn.initializer.Constant(value=2.0))
             >>> res = linear(data)
             >>> print(linear.weight)
+            Parameter containing:
             Tensor(shape=[2, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
             [[2., 2., 2., 2.],
              [2., 2., 2., 2.]])

From 9b81b3e8435441d600afdb293f448bdf9c82de41 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Tue, 1 Aug 2023 20:10:23 +0800
Subject: [PATCH 10/11] fix seed, test=docs_preview

---
 python/paddle/autograd/py_layer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/autograd/py_layer.py b/python/paddle/autograd/py_layer.py
index 95a0a42aafdd2..fa9243804faf0 100644
--- a/python/paddle/autograd/py_layer.py
+++ b/python/paddle/autograd/py_layer.py
@@ -317,8 +317,8 @@ class PyLayer(with_mateclass(PyLayerMeta, core.eager.PyLayer, PyLayerContext)):
 
             >>> print(data.grad)
             Tensor(shape=[2, 3], dtype=float64, place=Place(cpu), stop_gradient=True,
-            [[0.05858341, 0.16604150, 0.15677770],
-             [0.14051214, 0.02991660, 0.01564609]])
+            [[0.16604150, 0.05858341, 0.14051214],
+             [0.15677770, 0.01564609, 0.02991660]])
     """
 
     @staticmethod

From f07d397b8e6683ba719f8044361f6431b06e7e69 Mon Sep 17 00:00:00 2001
From: ooooo-create <106524776+ooooo-create@users.noreply.github.com>
Date: Wed, 2 Aug 2023 08:14:14 +0800
Subject: [PATCH 11/11] fix seed, test=docs_preview

---
 python/paddle/framework/random.py        | 4 ----
 python/paddle/nn/initializer/Bilinear.py | 1 -
 2 files changed, 5 deletions(-)

diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py
index 87862ac147992..9670e79b457bd 100644
--- a/python/paddle/framework/random.py
+++ b/python/paddle/framework/random.py
@@ -76,7 +76,6 @@ def get_rng_state(device=None):
     Examples:
         .. code-block:: python
             >>> import paddle
-            >>> paddle.seed(2023)
             >>> sts = paddle.get_rng_state()
     """
     state_list = []
@@ -131,7 +130,6 @@ def get_cuda_rng_state():
         .. code-block:: python
 
             >>> import paddle
-            >>> paddle.seed(2023)
             >>> sts = paddle.get_cuda_rng_state()
 
     """
@@ -161,7 +159,6 @@ def set_rng_state(state_list, device=None):
         .. code-block:: python
 
             >>> import paddle
-            >>> paddle.seed(2023)
             >>> sts = paddle.get_rng_state()
             >>> paddle.set_rng_state(sts)
 
@@ -227,7 +224,6 @@ def set_cuda_rng_state(state_list):
         .. code-block:: python
 
             >>> import paddle
-            >>> paddle.seed(2023)
             >>> sts = paddle.get_cuda_rng_state()
             >>> paddle.set_cuda_rng_state(sts)
 
diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py
index 09177286a5208..403f8773f15b1 100644
--- a/python/paddle/nn/initializer/Bilinear.py
+++ b/python/paddle/nn/initializer/Bilinear.py
@@ -49,7 +49,6 @@ class Bilinear(Initializer):
             >>> w_attr = paddle.ParamAttr(learning_rate=0.,
             ...                           regularizer=L2Decay(0.),
             ...                           initializer=nn.initializer.Bilinear())
-            >>> paddle.seed(2023)
             >>> data = paddle.rand([B, 3, H, W], dtype='float32')
             >>> conv_up = nn.Conv2DTranspose(3,
             ...                              out_channels=C,