From 9e6f4433858a99a687e4cd90d050eb04af625768 Mon Sep 17 00:00:00 2001 From: Candy2Tang <141831089+Candy2Tang@users.noreply.github.com> Date: Fri, 11 Aug 2023 14:40:26 +0800 Subject: [PATCH] [xdoctest][task 105]reformat example code with google style in python/paddle/optimizer/adamax.py (#56167) * [xdoctest][task 105]reformat example code with google style in python/paddle/optimizer/adamax.py * fix word typo test=docs_preview * fix comment test=docs_preview --- python/paddle/optimizer/adamax.py | 96 ++++++++++++++++--------------- 1 file changed, 49 insertions(+), 47 deletions(-) diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index bc33c392a4cba..26988e9d3c96a 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -62,18 +62,18 @@ class Adamax(Optimizer): parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. This parameter is required in dygraph mode. And you can specify different options for different parameter groups such as the learning rate, weight decay, etc, - then the parameters are list of dict. Note that the learning_rate in paramter groups + then the parameters are list of dict. Note that the learning_rate in parameter groups represents the scale of base learning_rate. The default value is None in static graph mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. - It canbe a float value as coeff of L2 regularization or + It can be a float value as coeff of L2 regularization or :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`. If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be ignored for this parameter. Otherwise, the regularization setting here in optimizer will take effect. Default None, meaning there is no regularization. - grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of - some derived class of ``GradientClipBase`` . There are three cliping strategies + grad_clip (GradientClipBase, optional): Gradient clipping strategy, it's an instance of + some derived class of ``GradientClipBase`` . There are three clipping strategies ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` , :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping. name (str, optional): Normally there is no need for user to set this property. @@ -86,49 +86,51 @@ class Adamax(Optimizer): Examples: .. code-block:: python - import paddle - - inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) - linear = paddle.nn.Linear(10, 10) - inp = paddle.to_tensor(inp) - out = linear(inp) - loss = paddle.mean(out) - - beta1 = paddle.to_tensor([0.9], dtype="float32") - beta2 = paddle.to_tensor([0.99], dtype="float32") - - adam = paddle.optimizer.Adamax(learning_rate=0.1, - parameters=linear.parameters(), - beta1=beta1, - beta2=beta2, - weight_decay=0.01) - out.backward() - adam.step() - adam.clear_grad() - - - #Note that the learning_rate of linear_2 is 0.01. - linear_1 = paddle.nn.Linear(10, 10) - linear_2 = paddle.nn.Linear(10, 10) - inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) - out = linear_1(inp) - out = linear_2(out) - loss = paddle.mean(out) - adam = paddle.optimizer.Adamax( - learning_rate=0.1, - parameters=[{ - 'params': linear_1.parameters() - }, { - 'params': linear_2.parameters(), - 'weight_decay': 0.001, - 'learning_rate': 0.1, - 'beta1': 0.8 - }], - weight_decay=0.01, - beta1=0.9) - out.backward() - adam.step() - adam.clear_grad() + >>> import paddle + + >>> inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1) + >>> linear = paddle.nn.Linear(10, 10) + >>> inp = paddle.to_tensor(inp) + >>> out = linear(inp) + >>> loss = paddle.mean(out) + + >>> beta1 = paddle.to_tensor([0.9], dtype="float32") + >>> beta2 = paddle.to_tensor([0.99], dtype="float32") + + >>> adam = paddle.optimizer.Adamax(learning_rate=0.1, + ... parameters=linear.parameters(), + ... beta1=beta1, + ... beta2=beta2, + ... weight_decay=0.01 + ... ) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() + + + >>> # Note that the learning_rate of linear_2 is 0.01. + >>> linear_1 = paddle.nn.Linear(10, 10) + >>> linear_2 = paddle.nn.Linear(10, 10) + >>> inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1) + >>> out = linear_1(inp) + >>> out = linear_2(out) + >>> loss = paddle.mean(out) + >>> adam = paddle.optimizer.Adamax( + ... learning_rate=0.1, + ... parameters=[{ + ... 'params': linear_1.parameters() + ... }, { + ... 'params': linear_2.parameters(), + ... 'weight_decay': 0.001, + ... 'learning_rate': 0.1, + ... 'beta1': 0.8 + ... }], + ... weight_decay=0.01, + ... beta1=0.9 + ... ) + >>> out.backward() + >>> adam.step() + >>> adam.clear_grad() """ _moment_acc_str = "moment" _inf_norm_acc_str = "inf_norm"