Skip to content

Commit

Permalink
[xdoctest][task 105]reformat example code with google style in python…
Browse files Browse the repository at this point in the history
…/paddle/optimizer/adamax.py (#56167)

* [xdoctest][task 105]reformat example code with google style in python/paddle/optimizer/adamax.py

* fix word typo test=docs_preview

* fix comment test=docs_preview
  • Loading branch information
Candy2Tang authored Aug 11, 2023
1 parent 81874b9 commit 9e6f443
Showing 1 changed file with 49 additions and 47 deletions.
96 changes: 49 additions & 47 deletions python/paddle/optimizer/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,18 @@ class Adamax(Optimizer):
parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``.
This parameter is required in dygraph mode. And you can specify different options for
different parameter groups such as the learning rate, weight decay, etc,
then the parameters are list of dict. Note that the learning_rate in paramter groups
then the parameters are list of dict. Note that the learning_rate in parameter groups
represents the scale of base learning_rate.
The default value is None in static graph mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
It canbe a float value as coeff of L2 regularization or
It can be a float value as coeff of L2 regularization or
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already,
the regularization setting here in optimizer will be ignored for this parameter.
Otherwise, the regularization setting here in optimizer will take effect.
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three clipping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): Normally there is no need for user to set this property.
Expand All @@ -86,49 +86,51 @@ class Adamax(Optimizer):
Examples:
.. code-block:: python
import paddle
inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adam = paddle.optimizer.Adamax(learning_rate=0.1,
parameters=linear.parameters(),
beta1=beta1,
beta2=beta2,
weight_decay=0.01)
out.backward()
adam.step()
adam.clear_grad()
#Note that the learning_rate of linear_2 is 0.01.
linear_1 = paddle.nn.Linear(10, 10)
linear_2 = paddle.nn.Linear(10, 10)
inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
out = linear_1(inp)
out = linear_2(out)
loss = paddle.mean(out)
adam = paddle.optimizer.Adamax(
learning_rate=0.1,
parameters=[{
'params': linear_1.parameters()
}, {
'params': linear_2.parameters(),
'weight_decay': 0.001,
'learning_rate': 0.1,
'beta1': 0.8
}],
weight_decay=0.01,
beta1=0.9)
out.backward()
adam.step()
adam.clear_grad()
>>> import paddle
>>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
>>> linear = paddle.nn.Linear(10, 10)
>>> inp = paddle.to_tensor(inp)
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> adam = paddle.optimizer.Adamax(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
>>> # Note that the learning_rate of linear_2 is 0.01.
>>> linear_1 = paddle.nn.Linear(10, 10)
>>> linear_2 = paddle.nn.Linear(10, 10)
>>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
>>> out = linear_1(inp)
>>> out = linear_2(out)
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adamax(
... learning_rate=0.1,
... parameters=[{
... 'params': linear_1.parameters()
... }, {
... 'params': linear_2.parameters(),
... 'weight_decay': 0.001,
... 'learning_rate': 0.1,
... 'beta1': 0.8
... }],
... weight_decay=0.01,
... beta1=0.9
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
"""
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"
Expand Down

0 comments on commit 9e6f443

Please sign in to comment.