diff --git a/_typos.toml b/_typos.toml index e40d20c3ca14b6..44b00526270719 100644 --- a/_typos.toml +++ b/_typos.toml @@ -346,8 +346,6 @@ neigbhors = 'neigbhors' Neigbors = 'Neigbors' neighor = 'neighor' netwrok = 'netwrok' -normlized = 'normlized' -Normlized = 'Normlized' normlize = 'normlize' noraml = 'noraml' numer = 'numer' diff --git a/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h b/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h index b188c517c20a42..da7fde9e25a65d 100644 --- a/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h +++ b/paddle/fluid/primitive/decomp_rule/decomp_vjp/details.h @@ -834,16 +834,16 @@ void layer_norm_grad(const Tensor& x, auto bias_ptr = bias.get_ptr(); LayerNormDecompHelper decomp_help(x, scale, bias, begin_norm_axis); - std::vector normlized_axis; + std::vector normalized_axis; std::vector mean_var_new_shape(mean.dims().size(), 0); for (int i = begin_norm_axis; i < x_dims.size(); ++i) { mean_var_new_shape.push_back(1); - normlized_axis.push_back(i); + normalized_axis.push_back(i); } - std::vector un_normlized_axis; + std::vector un_normalized_axis; for (int i = 0; i < begin_norm_axis; ++i) { - un_normlized_axis.push_back(i); + un_normalized_axis.push_back(i); } auto mean_ = reshape(mean, mean_var_new_shape); @@ -875,14 +875,14 @@ void layer_norm_grad(const Tensor& x, } auto dx_end = sqrt_var_1 * out_grad_scale; - auto d_mean = dx_end.sum(normlized_axis, x_cast.dtype(), true); // M,1 + auto d_mean = dx_end.sum(normalized_axis, x_cast.dtype(), true); // M,1 auto d_std_1 = (tmp * x_sub_mean * out_grad_scale) - .sum(normlized_axis, x_cast.dtype(), true); // M,1 + .sum(normalized_axis, x_cast.dtype(), true); // M,1 auto d_std = d_std_1 * x_sub_mean_mul_sqrt_var_1; // M,1 * M,N = M,N auto d_mean_d_std = - (d_mean + d_std) / decomp_help.GetNormlizedNumel(d_std); + (d_mean + d_std) / decomp_help.GetNormalizedNumel(d_std); auto x_grad_tmp = dx_end - d_mean_d_std; x_grad_tmp = ConverToOrig(x_grad_tmp, x.dtype()); @@ -893,7 +893,7 @@ void layer_norm_grad(const Tensor& x, if (scale_grad) { if (scale_ptr) { auto scale_grad_tmp = (x_sub_mean_mul_sqrt_var_1 * out_grad_cast) - .sum(un_normlized_axis, x_cast.dtype(), true); + .sum(un_normalized_axis, x_cast.dtype(), true); scale_grad_tmp = reshape(scale_grad_tmp, {-1}); scale_grad_tmp = ConverToOrig(scale_grad_tmp, scale_ptr->dtype()); @@ -906,7 +906,7 @@ void layer_norm_grad(const Tensor& x, if (bias_grad) { if (bias_ptr) { auto bias_grad_tmp = - out_grad_cast.sum(un_normlized_axis, x_cast.dtype(), true); + out_grad_cast.sum(un_normalized_axis, x_cast.dtype(), true); bias_grad_tmp = reshape(bias_grad_tmp, {-1}); bias_grad_tmp = ConverToOrig(bias_grad_tmp, bias_ptr->dtype()); diff --git a/paddle/fluid/primitive/decomp_utils/decomp_utils.h b/paddle/fluid/primitive/decomp_utils/decomp_utils.h index 0509b2699f40cc..de89fca34db7cf 100644 --- a/paddle/fluid/primitive/decomp_utils/decomp_utils.h +++ b/paddle/fluid/primitive/decomp_utils/decomp_utils.h @@ -322,22 +322,22 @@ class LayerNormDecompHelper { for (int i = begin_norm_axis; i < x_rank_; ++i) { if (x_dims[i] < 0) { static_norm_shape_ = false; - normlized_numel_ = -1; + normalized_numel_ = -1; break; } - normlized_shape_.push_back(x_dims[i]); + normalized_shape_.push_back(x_dims[i]); - normlized_numel_ *= x_dims[i]; + normalized_numel_ *= x_dims[i]; } if (!static_norm_shape_) { // try get static norm numel from sacle for bias - normlized_numel_ = -1; + normalized_numel_ = -1; if (scale.get_ptr()) { - normlized_numel_ = scale->dims()[0]; + normalized_numel_ = scale->dims()[0]; } else if (bias.get_ptr()) { - normlized_numel_ = bias->dims()[0]; + normalized_numel_ = bias->dims()[0]; } } } @@ -349,7 +349,7 @@ class LayerNormDecompHelper { } if (static_norm_shape_) { - return reshape(s, normlized_shape_); + return reshape(s, normalized_shape_); } else { return backend::reshape( s, get_slice_vec(shape64(x), begin_norm_axis_, x_rank_)); @@ -357,9 +357,9 @@ class LayerNormDecompHelper { } template - Tensor GetNormlizedNumel(const Tensor& x) { - if (normlized_numel_ != -1) { - return full_scalar(normlized_numel_, x.dtype()); + Tensor GetNormalizedNumel(const Tensor& x) { + if (normalized_numel_ != -1) { + return full_scalar(normalized_numel_, x.dtype()); } else { auto x_shape = shape64(x); auto numel = get_slice(x_shape, begin_norm_axis_); @@ -372,11 +372,11 @@ class LayerNormDecompHelper { } private: - std::vector normlized_shape_; + std::vector normalized_shape_; bool scale_need_reshape_; bool static_norm_shape_; int64_t x_rank_; - int64_t normlized_numel_{1}; + int64_t normalized_numel_{1}; int begin_norm_axis_; };