Skip to content

Commit

Permalink
add fp16 kernel for clip_op (#36577)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangbo9674 authored Oct 22, 2021
1 parent d490621 commit 1962d3a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 10 deletions.
8 changes: 6 additions & 2 deletions paddle/fluid/operators/clip_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ REGISTER_OP_CUDA_KERNEL(
clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);
ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ClipKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);

REGISTER_OP_CUDA_KERNEL(
clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ClipGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
6 changes: 3 additions & 3 deletions paddle/fluid/operators/clip_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ClipGradFunctor {
public:
explicit ClipGradFunctor(const T min, const T max) : min_(min), max_(max) {}
HOSTDEVICE T operator()(const T& x, const T& y) const {
return (y > min_ && y < max_) ? x : 0;
return (y > min_ && y < max_) ? x : static_cast<T>(0);
}

private:
Expand All @@ -79,7 +79,7 @@ class ClipKernel : public framework::OpKernel<T> {
}
max = static_cast<T>(max);

auto min = context.Attr<float>("min");
auto min = static_cast<T>(context.Attr<float>("min"));
Tensor min_cpu;
if (context.HasInput("Min")) {
auto* min_t = context.Input<Tensor>("Min");
Expand Down Expand Up @@ -156,7 +156,7 @@ class ClipGradKernel : public framework::OpKernel<T> {
}
max = static_cast<T>(max);

auto min = context.Attr<float>("min");
auto min = static_cast<T>(context.Attr<float>("min"));
Tensor min_cpu;
if (context.HasInput("Min")) {
auto* min_t = context.Input<Tensor>("Min");
Expand Down
26 changes: 21 additions & 5 deletions python/paddle/fluid/tests/unittests/test_clip_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def setUp(self):
else:
max_v = self.attrs['max']

input = np.random.random(self.shape).astype("float32")
input = np.random.random(self.shape).astype(self.dtype)
input[np.abs(input - min_v) < self.max_relative_error] = 0.5
input[np.abs(input - max_v) < self.max_relative_error] = 0.5
self.inputs['X'] = input
Expand All @@ -60,50 +60,66 @@ def test_check_grad_normal(self):
paddle.disable_static()

def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 10, 10)
self.max = 0.8
self.min = 0.3
self.inputs['Max'] = np.array([0.8]).astype('float32')
self.inputs['Min'] = np.array([0.1]).astype('float32')
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.1]).astype(self.dtype)


class TestCase1(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16, 8)
self.max = 0.7
self.min = 0.0


class TestCase2(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16)
self.max = 1.0
self.min = 0.0


class TestCase3(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.7
self.min = 0.2


class TestCase4(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype('float32')
self.inputs['Min'] = np.array([0.3]).astype('float32')
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)


class TestCase5(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.5
self.min = 0.5


class TestCase6(TestClipOp):
def initTestCase(self):
self.dtype == np.float16
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)


class TestClipOpError(unittest.TestCase):
def test_errors(self):
paddle.enable_static()
Expand Down

0 comments on commit 1962d3a

Please sign in to comment.