diff --git a/apex/normalization/fused_layer_norm.py b/apex/normalization/fused_layer_norm.py index 8558f7a5e..d873969f4 100644 --- a/apex/normalization/fused_layer_norm.py +++ b/apex/normalization/fused_layer_norm.py @@ -310,6 +310,7 @@ class FusedRMSNorm(torch.nn.Module): :attr:`normalized_shape`. :math:`\gamma` is a learnable affine transform parameter of :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``. + `epsilon` is added to the mean-square, then the root of the sum is taken. .. note:: Unlike Batch Normalization and Instance Normalization, which applies