add fp16 kernel for clip_op (#36577)

PaddlePaddle · Oct 22, 2021 · 1962d3a · 1962d3a
1 parent d490621
commit 1962d3a
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 10 deletions.
diff --git a/paddle/fluid/operators/clip_op.cu b/paddle/fluid/operators/clip_op.cu
@@ -19,10 +19,14 @@ REGISTER_OP_CUDA_KERNEL(
     clip, ops::ClipKernel<paddle::platform::CUDADeviceContext, float>,
     ops::ClipKernel<paddle::platform::CUDADeviceContext, double>,
     ops::ClipKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>);
+    ops::ClipKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::ClipKernel<paddle::platform::CUDADeviceContext,
+                    paddle::platform::float16>);
 
 REGISTER_OP_CUDA_KERNEL(
     clip_grad, ops::ClipGradKernel<paddle::platform::CUDADeviceContext, float>,
     ops::ClipGradKernel<paddle::platform::CUDADeviceContext, double>,
     ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::ClipGradKernel<paddle::platform::CUDADeviceContext,
+                        paddle::platform::float16>);
diff --git a/paddle/fluid/operators/clip_op.h b/paddle/fluid/operators/clip_op.h
@@ -54,7 +54,7 @@ class ClipGradFunctor {
  public:
   explicit ClipGradFunctor(const T min, const T max) : min_(min), max_(max) {}
   HOSTDEVICE T operator()(const T& x, const T& y) const {
-    return (y > min_ && y < max_) ? x : 0;
+    return (y > min_ && y < max_) ? x : static_cast<T>(0);
   }
 
  private:
@@ -79,7 +79,7 @@ class ClipKernel : public framework::OpKernel<T> {
     }
     max = static_cast<T>(max);
 
-    auto min = context.Attr<float>("min");
+    auto min = static_cast<T>(context.Attr<float>("min"));
     Tensor min_cpu;
     if (context.HasInput("Min")) {
       auto* min_t = context.Input<Tensor>("Min");
@@ -156,7 +156,7 @@ class ClipGradKernel : public framework::OpKernel<T> {
     }
     max = static_cast<T>(max);
 
-    auto min = context.Attr<float>("min");
+    auto min = static_cast<T>(context.Attr<float>("min"));
     Tensor min_cpu;
     if (context.HasInput("Min")) {
       auto* min_t = context.Input<Tensor>("Min");

diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py
@@ -43,7 +43,7 @@ def setUp(self):
         else:
             max_v = self.attrs['max']
 
-        input = np.random.random(self.shape).astype("float32")
+        input = np.random.random(self.shape).astype(self.dtype)
         input[np.abs(input - min_v) < self.max_relative_error] = 0.5
         input[np.abs(input - max_v) < self.max_relative_error] = 0.5
         self.inputs['X'] = input
@@ -60,50 +60,66 @@ def test_check_grad_normal(self):
         paddle.disable_static()
 
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (4, 10, 10)
         self.max = 0.8
         self.min = 0.3
-        self.inputs['Max'] = np.array([0.8]).astype('float32')
-        self.inputs['Min'] = np.array([0.1]).astype('float32')
+        self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
+        self.inputs['Min'] = np.array([0.1]).astype(self.dtype)
 
 
 class TestCase1(TestClipOp):
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (8, 16, 8)
         self.max = 0.7
         self.min = 0.0
 
 
 class TestCase2(TestClipOp):
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (8, 16)
         self.max = 1.0
         self.min = 0.0
 
 
 class TestCase3(TestClipOp):
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (4, 8, 16)
         self.max = 0.7
         self.min = 0.2
 
 
 class TestCase4(TestClipOp):
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (4, 8, 8)
         self.max = 0.7
         self.min = 0.2
-        self.inputs['Max'] = np.array([0.8]).astype('float32')
-        self.inputs['Min'] = np.array([0.3]).astype('float32')
+        self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
+        self.inputs['Min'] = np.array([0.3]).astype(self.dtype)
 
 
 class TestCase5(TestClipOp):
     def initTestCase(self):
+        self.dtype = np.float32
         self.shape = (4, 8, 16)
         self.max = 0.5
         self.min = 0.5
 
 
+class TestCase6(TestClipOp):
+    def initTestCase(self):
+        self.dtype == np.float16
+        self.shape = (4, 8, 8)
+        self.max = 0.7
+        self.min = 0.2
+        self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
+        self.inputs['Min'] = np.array([0.3]).astype(self.dtype)
+
+
 class TestClipOpError(unittest.TestCase):
     def test_errors(self):
         paddle.enable_static()