Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phi softplus migration #44542

Merged
merged 11 commits into from
Jul 29, 2022
10 changes: 10 additions & 0 deletions paddle/phi/api/yaml/legacy_api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2029,6 +2029,16 @@
use_gpudnn : true
backward : softmax_grad

- api : softplus
args : (Tensor x, float beta, float threshold)
output : Tensor
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus
backward : softplus_grad

- api : split
args : (Tensor x, IntArray num_or_sections, Scalar(int) axis)
output : Tensor[]
Expand Down
12 changes: 12 additions & 0 deletions paddle/phi/api/yaml/legacy_backward.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1947,6 +1947,18 @@
func : softmax_grad
use_gpudnn : true

# softplus
- backward_api : softplus_grad
forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float beta, float threshold)
output : Tensor(x_grad)
infer_meta :
func : UnchangedInferMeta
param : [x]
kernel :
func : softplus_grad
inplace : (out_grad -> x_grad)

- backward_api : split_grad
forward : split (Tensor x, IntArray num_or_sections, Scalar axis) -> Tensor[](out)
args : (Tensor[] out_grad, Scalar axis = -1)
Expand Down
61 changes: 44 additions & 17 deletions python/paddle/fluid/layers/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
'silu',
'logsigmoid',
'tanh_shrink',
'softplus',
'softsign',
'tanh',
]
Expand Down Expand Up @@ -71,7 +70,15 @@
'reciprocal_',
]

__all__ = []
__all__ = [
'softplus',
'softshrink',
'hard_shrink',
'cumsum',
'thresholded_relu',
'gelu',
'erf',
]

for _OP in set(__all__):
globals()[_OP] = generate_layer_fn(_OP)
Expand Down Expand Up @@ -494,8 +501,40 @@

""")

add_sample_code(
globals()["softplus"], r"""
_softplus_ = generate_layer_fn('softplus')


def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None):
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus')
locals_val = locals().copy()
kwargs = dict()
for name, val in locals_val.items():
if val is not None:
kwargs[name] = val
return _softplus_(**kwargs)


softplus.__doc__ = r"""
:alias_main: paddle.nn.functional.softplus
:alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus
:old_api: paddle.fluid.layers.softplus

:strong:`Softplus Activation Operator`

Equation:
.. math::
out = \\frac{1}{beta} * log(1 + e^{beta * x})
For numerical stability, the implementation reverts to the linear function when: beta * x > threshold.

Args:
x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64
beta(float, optional): The value of beta for softplus. Default is 1
threshold (float, optional): The value of threshold for softplus. Default is 20
name(str, optional): Name for the operation (optional, default is None)

Returns:
Variable: The output of Softplus op, Tensor, dtype: float32 or float64

Examples:
.. code-block:: python

Expand All @@ -507,7 +546,7 @@
print(out)
# [0.513015, 0.598139, 0.744397, 0.854355]

""")
"""

add_sample_code(
globals()["softsign"], r"""
Expand All @@ -524,8 +563,6 @@

""")

__all__ += ['softshrink']

_softshrink_ = generate_layer_fn('softshrink')


Expand Down Expand Up @@ -574,8 +611,6 @@ def softshrink(x, alpha=None):
result = fluid.layers.softshrink(x=data, alpha=0.3)
"""

__all__ += ['hard_shrink']

_hard_shrink_ = generate_layer_fn('hard_shrink')


Expand All @@ -600,8 +635,6 @@ def hard_shrink(x, threshold=None):
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
"""

__all__ += ['cumsum']

_cum_sum_ = generate_layer_fn('cumsum')


Expand Down Expand Up @@ -642,8 +675,6 @@ def cumsum(x, axis=None, exclusive=None, reverse=None):
result = fluid.layers.cumsum(data, axis=0)
"""

__all__ += ['thresholded_relu']

_thresholded_relu_ = generate_layer_fn('thresholded_relu')


Expand Down Expand Up @@ -732,8 +763,6 @@ def thresholded_relu(x, threshold=None):
# [-0. , -0. , 1.0013918 ]], dtype=float32)
"""

__all__ += ['gelu']

_gelu_ = generate_layer_fn('gelu')


Expand Down Expand Up @@ -817,8 +846,6 @@ def gelu(x, approximate=False):
# [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32)
"""

__all__ += ['erf']

_erf_ = generate_layer_fn('erf')


Expand Down
7 changes: 6 additions & 1 deletion python/paddle/fluid/tests/unittests/test_activation_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation):

def setUp(self):
self.op_type = "softplus"
self.python_api = paddle.nn.functional.softplus
self.init_dtype()

beta = 2
Expand All @@ -2688,10 +2689,14 @@ def setUp(self):
self.attrs = {'beta': beta, "threshold": threshold}
self.outputs = {'Out': out}

self.check_eager = True

def test_check_grad(self):
if self.dtype == np.float16:
return
self.check_grad(['X'], 'Out')
if hasattr(self, 'check_eager'):
check_eager = self.check_eager
self.check_grad(['X'], 'Out', check_eager=check_eager)


@unittest.skipIf(not core.is_compiled_with_cuda(),
Expand Down
6 changes: 5 additions & 1 deletion python/paddle/nn/functional/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None):
x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355]
"""
if in_dynamic_mode():

if in_dygraph_mode():
return _C_ops.final_state_softplus(x, beta, threshold)

if _in_legacy_dygraph():
return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold)

check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
Expand Down