PaddlePaddle · zyfncg · Jul 29, 2022 · Jul 18, 2022 · Jul 19, 2022 · Jul 22, 2022
diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
@@ -2029,6 +2029,16 @@
     use_gpudnn : true
   backward : softmax_grad
 
+- api : softplus
+  args : (Tensor x, float beta, float threshold)
+  output : Tensor
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus
+  backward : softplus_grad
+
 - api : split
   args : (Tensor x, IntArray num_or_sections, Scalar(int) axis)
   output : Tensor[]

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -1947,6 +1947,18 @@
     func : softmax_grad
     use_gpudnn : true
 
+# softplus
+- backward_api : softplus_grad
+  forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
+  args : (Tensor x, Tensor out_grad, float beta, float threshold)
+  output : Tensor(x_grad)
+  infer_meta :
+    func : UnchangedInferMeta
+    param : [x]
+  kernel :
+    func : softplus_grad
+  inplace : (out_grad -> x_grad)
+
 - backward_api : split_grad
   forward : split (Tensor x, IntArray num_or_sections, Scalar axis) -> Tensor[](out)
   args : (Tensor[] out_grad, Scalar axis = -1)

diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py
@@ -31,7 +31,6 @@
     'silu',
     'logsigmoid',
     'tanh_shrink',
-    'softplus',
     'softsign',
     'tanh',
 ]
@@ -71,7 +70,15 @@
     'reciprocal_',
 ]
 
-__all__ = []
+__all__ = [
+    'softplus',
+    'softshrink',
+    'hard_shrink',
+    'cumsum',
+    'thresholded_relu',
+    'gelu',
+    'erf',
+]
 
 for _OP in set(__all__):
     globals()[_OP] = generate_layer_fn(_OP)
@@ -494,8 +501,40 @@
 
 """)
 
-add_sample_code(
-    globals()["softplus"], r"""
+_softplus_ = generate_layer_fn('softplus')
+
+
+def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None):
+    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus')
+    locals_val = locals().copy()
+    kwargs = dict()
+    for name, val in locals_val.items():
+        if val is not None:
+            kwargs[name] = val
+    return _softplus_(**kwargs)
+
+
+softplus.__doc__ = r"""
+    :alias_main: paddle.nn.functional.softplus
+    :alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus
+    :old_api: paddle.fluid.layers.softplus
+
+:strong:`Softplus Activation Operator`
+
+Equation:
+    .. math::
+        out = \\frac{1}{beta} * log(1 + e^{beta * x})
+        For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.
+
+Args:
+    x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64
+    beta(float, optional): The value of beta for softplus. Default is 1
+    threshold (float, optional): The value of threshold for softplus. Default is 20
+    name(str, optional): Name for the operation (optional, default is None)
+
+Returns:
+    Variable: The output of Softplus op, Tensor, dtype: float32 or float64
+
 Examples:
     .. code-block:: python
 
@@ -507,7 +546,7 @@
         print(out)
         # [0.513015, 0.598139, 0.744397, 0.854355]
 
-""")
+"""
 
 add_sample_code(
     globals()["softsign"], r"""
@@ -524,8 +563,6 @@
 
 """)
 
-__all__ += ['softshrink']
-
 _softshrink_ = generate_layer_fn('softshrink')
 
 
@@ -574,8 +611,6 @@ def softshrink(x, alpha=None):
         result = fluid.layers.softshrink(x=data, alpha=0.3)
 """
 
-__all__ += ['hard_shrink']
-
 _hard_shrink_ = generate_layer_fn('hard_shrink')
 
 
@@ -600,8 +635,6 @@ def hard_shrink(x, threshold=None):
     >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
 """
 
-__all__ += ['cumsum']
-
 _cum_sum_ = generate_layer_fn('cumsum')
 
 
@@ -642,8 +675,6 @@ def cumsum(x, axis=None, exclusive=None, reverse=None):
         result = fluid.layers.cumsum(data, axis=0)
 """
 
-__all__ += ['thresholded_relu']
-
 _thresholded_relu_ = generate_layer_fn('thresholded_relu')
 
 
@@ -732,8 +763,6 @@ def thresholded_relu(x, threshold=None):
         #        [-0.        , -0.        ,  1.0013918 ]], dtype=float32)
 """
 
-__all__ += ['gelu']
-
 _gelu_ = generate_layer_fn('gelu')
 
 
@@ -817,8 +846,6 @@ def gelu(x, approximate=False):
         #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
 """
 
-__all__ += ['erf']
-
 _erf_ = generate_layer_fn('erf')
 
 

diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py
@@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation):
 
     def setUp(self):
         self.op_type = "softplus"
+        self.python_api = paddle.nn.functional.softplus
         self.init_dtype()
 
         beta = 2
@@ -2688,10 +2689,14 @@ def setUp(self):
         self.attrs = {'beta': beta, "threshold": threshold}
         self.outputs = {'Out': out}
 
+        self.check_eager = True
+
     def test_check_grad(self):
         if self.dtype == np.float16:
             return
-        self.check_grad(['X'], 'Out')
+        if hasattr(self, 'check_eager'):
+            check_eager = self.check_eager
+        self.check_grad(['X'], 'Out', check_eager=check_eager)
 
 
 @unittest.skipIf(not core.is_compiled_with_cuda(),

diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
@@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
     """
-    if in_dynamic_mode():
+
+    if in_dygraph_mode():
+        return _C_ops.final_state_softplus(x, beta, threshold)
+
+    if _in_legacy_dygraph():
         return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],