huggingface · amyeroberts · Aug 17, 2022 · Aug 5, 2022 · Aug 5, 2022 · Aug 5, 2022
diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py
@@ -131,6 +131,13 @@ def convert_rgb(self, image):
 
         return image.convert("RGB")
 
+    def rescale(self, image: np.ndarray, scale: Union[float, int]) -> np.ndarray:
+        """
+        Rescale a numpy image by scale amount
+        """
+        self._ensure_format_supported(image)
+        return image * scale
+
     def to_numpy_array(self, image, rescale=None, channel_first=True):
         """
         Converts `image` to a numpy array. Optionally rescales it and puts the channel dimension as the first
@@ -153,11 +160,10 @@ def to_numpy_array(self, image, rescale=None, channel_first=True):
         if is_torch_tensor(image):
             image = image.numpy()
 
-        if rescale is None:
-            rescale = isinstance(image.flat[0], np.integer)
+        rescale = isinstance(image.flat[0], np.integer) if rescale is None else rescale
 
         if rescale:
-            image = image.astype(np.float32) / 255.0
+            image = self.rescale(image.astype(np.float32), 1 / 255.0)
 
         if channel_first and image.ndim == 3:
             image = image.transpose(2, 0, 1)
@@ -184,7 +190,7 @@ def expand_dims(self, image):
             image = np.expand_dims(image, axis=0)
         return image
 
-    def normalize(self, image, mean, std):
+    def normalize(self, image, mean, std, rescale=False):
         """
         Normalizes `image` with `mean` and `std`. Note that this will trigger a conversion of `image` to a NumPy array
         if it's a PIL Image.
@@ -196,11 +202,21 @@ def normalize(self, image, mean, std):
                 The mean (per channel) to use for normalization.
             std (`List[float]` or `np.ndarray` or `torch.Tensor`):
                 The standard deviation (per channel) to use for normalization.
+            rescale (`bool`, *optional*, defaults to `False`):
+                Whether or not to rescale the image to be between 0 and 1. If a PIL image is provided, scaling will
+                happen automatically.
         """
         self._ensure_format_supported(image)
 
         if isinstance(image, PIL.Image.Image):
-            image = self.to_numpy_array(image)
+            image = self.to_numpy_array(image, rescale=True)
+        # If the input image is a PIL image, it automatically gets rescaled. If it's another
+        # type it may need rescaling.
+        elif rescale:
+            if isinstance(image, np.ndarray):
+                image = self.rescale(image.astype(np.float32), 1 / 255.0)
+            elif is_torch_tensor(image):
+                image = self.rescale(image.float(), 1 / 255.0)
 
         if isinstance(image, np.ndarray):
             if not isinstance(mean, np.ndarray):

diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py
@@ -58,13 +58,13 @@ def test_conversion_image_to_array(self):
         array3 = feature_extractor.to_numpy_array(image, rescale=False)
         self.assertTrue(array3.dtype, np.uint8)
         self.assertEqual(array3.shape, (3, 16, 32))
-        self.assertTrue(np.array_equal(array1, array3.astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array1, array3.astype(np.float32) * (1 / 255.0)))
 
         # Conversion with no rescale and not channel first
         array4 = feature_extractor.to_numpy_array(image, rescale=False, channel_first=False)
         self.assertTrue(array4.dtype, np.uint8)
         self.assertEqual(array4.shape, (16, 32, 3))
-        self.assertTrue(np.array_equal(array2, array4.astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array2, array4.astype(np.float32) * (1 / 255.0)))
 
     def test_conversion_array_to_array(self):
         feature_extractor = ImageFeatureExtractionMixin()
@@ -74,13 +74,13 @@ def test_conversion_array_to_array(self):
         array1 = feature_extractor.to_numpy_array(array)
         self.assertTrue(array1.dtype, np.float32)
         self.assertEqual(array1.shape, (3, 16, 32))
-        self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)))
 
         # Same with no permute
         array2 = feature_extractor.to_numpy_array(array, channel_first=False)
         self.assertTrue(array2.dtype, np.float32)
         self.assertEqual(array2.shape, (16, 32, 3))
-        self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array2, array.astype(np.float32) * (1 / 255.0)))
 
         # Force rescale to False
         array3 = feature_extractor.to_numpy_array(array, rescale=False)
@@ -110,13 +110,13 @@ def test_conversion_torch_to_array(self):
         array1 = feature_extractor.to_numpy_array(array)
         self.assertTrue(array1.dtype, np.float32)
         self.assertEqual(array1.shape, (3, 16, 32))
-        self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array1, array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)))
 
         # Same with no permute
         array2 = feature_extractor.to_numpy_array(array, channel_first=False)
         self.assertTrue(array2.dtype, np.float32)
         self.assertEqual(array2.shape, (16, 32, 3))
-        self.assertTrue(np.array_equal(array2, array.astype(np.float32) / 255.0))
+        self.assertTrue(np.array_equal(array2, array.astype(np.float32) * (1 / 255.0)))
 
         # Force rescale to False
         array3 = feature_extractor.to_numpy_array(array, rescale=False)
@@ -160,7 +160,7 @@ def test_conversion_array_to_image(self):
         self.assertTrue(np.array_equal(np.array(image2), array))
 
         # If the array has floating type, it's rescaled by default.
-        image3 = feature_extractor.to_pil_image(array.astype(np.float32) / 255.0)
+        image3 = feature_extractor.to_pil_image(array.astype(np.float32) * (1 / 255.0))
         self.assertTrue(isinstance(image3, PIL.Image.Image))
         self.assertTrue(np.array_equal(np.array(image3), array))
 
@@ -170,7 +170,7 @@ def test_conversion_array_to_image(self):
         self.assertTrue(np.array_equal(np.array(image4), array))
 
         # And with floats + channel first.
-        image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) / 255.0)
+        image5 = feature_extractor.to_pil_image(array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0))
         self.assertTrue(isinstance(image5, PIL.Image.Image))
         self.assertTrue(np.array_equal(np.array(image5), array))
 
@@ -201,7 +201,7 @@ def test_conversion_tensor_to_image(self):
         self.assertTrue(np.array_equal(np.array(image4), array))
 
         # And with floats + channel first.
-        image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() / 255.0)
+        image5 = feature_extractor.to_pil_image(tensor.permute(2, 0, 1).float() * (1 / 255.0))
         self.assertTrue(isinstance(image5, PIL.Image.Image))
         self.assertTrue(np.array_equal(np.array(image5), array))
 
@@ -316,7 +316,7 @@ def test_normalize_image(self):
         self.assertEqual(normalized_image.shape, (3, 16, 32))
 
         # During the conversion rescale and channel first will be applied.
-        expected = array.transpose(2, 0, 1).astype(np.float32) / 255.0
+        expected = array.transpose(2, 0, 1).astype(np.float32) * (1 / 255.0)
         np_mean = np.array(mean).astype(np.float32)[:, None, None]
         np_std = np.array(std).astype(np.float32)[:, None, None]
         expected = (expected - np_mean) / np_std