Skip to content

Commit

Permalink
delete fix bug for cpu adamw
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangbo9674 committed Sep 26, 2021
1 parent 7d282e1 commit c308cdf
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 90 deletions.
3 changes: 1 addition & 2 deletions paddle/fluid/operators/optimizers/adamw_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,9 @@ class AdamWFunctor<T, CPUAdamW> {
param_, static_cast<Eigen::Index>(numel)};

T lr = *lr_;

// Calculation
param -= lr * lr_ratio_ * coeff_ * param;
T* lr_new = const_cast<T*>(lr_);
*lr_new *= lr_ratio_;
}
};

Expand Down
88 changes: 0 additions & 88 deletions python/paddle/fluid/tests/unittests/test_adamw_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,94 +96,6 @@ def test_adamw_op_invalid_input(self):
0.1, epsilon=-1, parameters=linear.parameters())


class TestAdamWOp1(OpTest):
def setUp(self):
'''Test Adam Op with supplied attributes
'''
self.op_type = "adamw"
param = np.random.uniform(-1, 1, (2, 2)).astype("float32")
grad = np.random.uniform(-1, 1, (2, 2)).astype("float32")
moment1 = np.random.uniform(-1, 1, (2, 2)).astype("float32")
# The second moment is positive
moment2 = np.random.random((2, 2)).astype("float32")

learning_rate = 0.004
beta1 = 0.78
beta2 = 0.836
epsilon = 1e-4
beta1_pow = beta1**10
beta2_pow = beta2**10

self.inputs = {
'Param': param,
'Grad': grad,
'Moment1': moment1,
'Moment2': moment2,
'LearningRate': np.array([learning_rate]).astype("float32"),
'Beta1Pow': np.array([beta1_pow]).astype("float32"),
'Beta2Pow': np.array([beta2_pow]).astype("float32")
}

self.attrs = {
'epsilon': epsilon,
'beta1': beta1,
'beta2': beta2,
"lr_ratio": 2.0,
"coeff": 0.5,
"with_decay": True
}

param_out, moment1_out, moment2_out = adamw_step(self.inputs,
self.attrs)

self.outputs = {
'Moment1Out': moment1_out,
'Moment2Out': moment2_out,
'ParamOut': param_out,
'Beta1PowOut': np.array([beta1_pow]).astype("float32") * beta1,
'Beta2PowOut': np.array([beta2_pow]).astype("float32") * beta2
}

def test_check_output(self):
paddle.enable_static()
self.check_output()


def adamw_step(inputs, attributes):
param = inputs['Param']
grad = inputs['Grad']
moment1 = inputs['Moment1']
moment2 = inputs['Moment2']
lr = inputs['LearningRate']
beta1_pow = inputs['Beta1Pow']
beta2_pow = inputs['Beta2Pow']

lr_ratio = attributes['lr_ratio']
lr = lr * lr_ratio
epsilon = attributes['epsilon']

if attributes["with_decay"]:
coeff = attributes["coeff"]
decay = 1.0 - lr * coeff
param2 = param * decay
param = param2.copy()
if 'beta1' in attributes:
beta1 = attributes['beta1']
else:
beta1 = inputs['Beta1Tensor'][0]
if 'beta2' in attributes:
beta2 = attributes['beta2']
else:
beta2 = inputs['Beta2Tensor'][0]

moment1_out = beta1 * moment1 + (1 - beta1) * grad
moment2_out = beta2 * moment2 + (1 - beta2) * np.square(grad)
lr_t = lr * np.sqrt(1 - beta2_pow) / (1 - beta1_pow)
param_out = param - lr_t * (moment1_out / (np.sqrt(moment2_out) + epsilon))

return param_out, moment1_out, moment2_out


class TestAdamWOpGroup(TestAdamWOp):
def test_adamw_op_dygraph(self):
paddle.disable_static()
Expand Down

0 comments on commit c308cdf

Please sign in to comment.