delete fix bug for cpu adamw

PaddlePaddle · Sep 26, 2021 · c308cdf · c308cdf
1 parent 7d282e1
commit c308cdf
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 90 deletions.
diff --git a/paddle/fluid/operators/optimizers/adamw_op.h b/paddle/fluid/operators/optimizers/adamw_op.h
@@ -45,10 +45,9 @@ class AdamWFunctor<T, CPUAdamW> {
         param_, static_cast<Eigen::Index>(numel)};
 
     T lr = *lr_;
+
     // Calculation
     param -= lr * lr_ratio_ * coeff_ * param;
-    T* lr_new = const_cast<T*>(lr_);
-    *lr_new *= lr_ratio_;
   }
 };
 

diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py
@@ -96,94 +96,6 @@ def test_adamw_op_invalid_input(self):
                 0.1, epsilon=-1, parameters=linear.parameters())
 
 
-class TestAdamWOp1(OpTest):
-    def setUp(self):
-        '''Test Adam Op with supplied attributes
-        '''
-        self.op_type = "adamw"
-        param = np.random.uniform(-1, 1, (2, 2)).astype("float32")
-        grad = np.random.uniform(-1, 1, (2, 2)).astype("float32")
-        moment1 = np.random.uniform(-1, 1, (2, 2)).astype("float32")
-        # The second moment is positive
-        moment2 = np.random.random((2, 2)).astype("float32")
-
-        learning_rate = 0.004
-        beta1 = 0.78
-        beta2 = 0.836
-        epsilon = 1e-4
-        beta1_pow = beta1**10
-        beta2_pow = beta2**10
-
-        self.inputs = {
-            'Param': param,
-            'Grad': grad,
-            'Moment1': moment1,
-            'Moment2': moment2,
-            'LearningRate': np.array([learning_rate]).astype("float32"),
-            'Beta1Pow': np.array([beta1_pow]).astype("float32"),
-            'Beta2Pow': np.array([beta2_pow]).astype("float32")
-        }
-
-        self.attrs = {
-            'epsilon': epsilon,
-            'beta1': beta1,
-            'beta2': beta2,
-            "lr_ratio": 2.0,
-            "coeff": 0.5,
-            "with_decay": True
-        }
-
-        param_out, moment1_out, moment2_out = adamw_step(self.inputs,
-                                                         self.attrs)
-
-        self.outputs = {
-            'Moment1Out': moment1_out,
-            'Moment2Out': moment2_out,
-            'ParamOut': param_out,
-            'Beta1PowOut': np.array([beta1_pow]).astype("float32") * beta1,
-            'Beta2PowOut': np.array([beta2_pow]).astype("float32") * beta2
-        }
-
-    def test_check_output(self):
-        paddle.enable_static()
-        self.check_output()
-
-
-def adamw_step(inputs, attributes):
-    param = inputs['Param']
-    grad = inputs['Grad']
-    moment1 = inputs['Moment1']
-    moment2 = inputs['Moment2']
-    lr = inputs['LearningRate']
-    beta1_pow = inputs['Beta1Pow']
-    beta2_pow = inputs['Beta2Pow']
-
-    lr_ratio = attributes['lr_ratio']
-    lr = lr * lr_ratio
-    epsilon = attributes['epsilon']
-
-    if attributes["with_decay"]:
-        coeff = attributes["coeff"]
-        decay = 1.0 - lr * coeff
-        param2 = param * decay
-        param = param2.copy()
-    if 'beta1' in attributes:
-        beta1 = attributes['beta1']
-    else:
-        beta1 = inputs['Beta1Tensor'][0]
-    if 'beta2' in attributes:
-        beta2 = attributes['beta2']
-    else:
-        beta2 = inputs['Beta2Tensor'][0]
-
-    moment1_out = beta1 * moment1 + (1 - beta1) * grad
-    moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad)
-    lr_t = lr * np.sqrt(1 - beta2_pow) / (1 - beta1_pow)
-    param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon))
-
-    return param_out, moment1_out, moment2_out
-
-
 class TestAdamWOpGroup(TestAdamWOp):
     def test_adamw_op_dygraph(self):
         paddle.disable_static()