huggingface · younesbelkada · Nov 25, 2022 · Nov 25, 2022 · Dec 1, 2022 · ydshieh
diff --git a/src/transformers/feature_extraction_utils.py b/src/transformers/feature_extraction_utils.py
@@ -70,9 +70,15 @@ class BatchFeature(UserDict):
             initialization.
     """
 
-    def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
+    def __init__(
+        self,
+        data: Optional[Dict[str, Any]] = None,
+        tensor_type: Union[None, str, TensorType] = None,
+        float_precision: Optional[str] = None,
+    ):
         super().__init__(data)
         self.convert_to_tensors(tensor_type=tensor_type)
+        self.cast_to_dtype(tensor_type=tensor_type, float_precision=float_precision)
 
     def __getitem__(self, item: str) -> Union[Any]:
         """
@@ -109,6 +115,101 @@ def values(self):
     def items(self):
         return self.data.items()
 
+    def cast_to_dtype(
+        self, tensor_type: Optional[Union[str, TensorType]] = None, float_precision: Optional[str] = None
+    ):
+        """
+        Maybe cast the input tensors (floating point tensors only) to the desired precision
+
+        Args:
+            tensor_type (`str` or [`~utils.TensorType`], *optional*):
+                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
+                `None`, no modification is done.
+            float_precision (`str`, *optional*):
+                The output floating point precision [float16, float32, double, bfloat16]
+        """
+        if (float_precision is None) or (tensor_type is None):
+            return self
+
+        # Convert to TensorType
+        # Convert to TensorType
+        tensor_type = TensorType(tensor_type)
+
+        # Get a function reference for the correct framework
+        if tensor_type == TensorType.TENSORFLOW:
+            if not is_tf_available():
+                raise ImportError(
+                    "Unable to convert output to TensorFlow tensors format, TensorFlow is not installed."
+                )
+            import tensorflow as tf
+
+            target_framework = tf
+            cast_fun = tf.cast
+
+            def is_floating(x):
+                return x.dtype in (tf.float16, tf.float32, tf.double, tf.bfloat16)
+
+            is_tensor = tf.is_tensor
+
+        elif tensor_type == TensorType.PYTORCH:
+            if not is_torch_available():
+                raise ImportError("Unable to convert output to PyTorch tensors format, PyTorch is not installed.")
+            import torch
+
+            target_framework = torch
+
+            def cast_fun(x, dtype):
+                return x.to(dtype=dtype)
+
+            def is_floating(x):
+                return x.dtype in (torch.float16, torch.float32, torch.double, torch.bfloat16)
+
+            is_tensor = torch.is_tensor
+
+        # Jax tensors
+        elif tensor_type == TensorType.JAX:
+            if not is_flax_available():
+                raise ImportError("Unable to convert output to JAX tensors format, JAX is not installed.")
+            import jax.numpy as jnp  # noqa: F811
+
+            target_framework = jnp
+
+            def cast_fun(x, dtype):
+                return x.astype(dtype=dtype)
+
+            def is_floating(x):
+                return x.dtype in (jnp.float16, jnp.float32, jnp.double, jnp.bfloat16)
+
+            is_tensor = is_jax_tensor
+        # np arrays
+        else:
+            target_framework = np
+
+            def cast_fun(x, dtype):
+                return x.astype(dtype=dtype)
+
+            def is_floating(x):
+                return x.dtype in (np.half, np.single, np.double, np.longdouble)
+
+            is_tensor = is_numpy_array
+
+        if hasattr(target_framework, float_precision):
+            target_dtype = getattr(target_framework, float_precision)
+        else:
+            raise ValueError(
+                f"Failed to import the `dtype` {target_dtype} from the framework {target_framework} - please use a"
+                " supported `dtype` for your targeted framework.",
+            )
+
+        # Do the tensor conversion in batch
+        for key, value in self.items():
+            # sanity check that we check for only tensors
+            if is_tensor(value):
+                if is_floating(value):
+                    self[key] = cast_fun(value, target_dtype)
+
+        return self
+
     def convert_to_tensors(self, tensor_type: Optional[Union[str, TensorType]] = None):
         """
         Convert the inner content to tensors.

diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py
@@ -193,6 +193,7 @@ def preprocess(
         image_std: Optional[Union[float, List[float]]] = None,
         return_tensors: Optional[Union[str, TensorType]] = None,
         data_format: Union[str, ChannelDimension] = ChannelDimension.FIRST,
+        float_precision: Optional[str] = None,
         **kwargs,
     ):
         """
@@ -231,6 +232,8 @@ def preprocess(
                 - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                 - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                 - Unset: Use the channel dimension format of the input image.
+            float_precision (`str`, *optional*):
+                The output floating point precision [float16, float32, double, bfloat16]
         """
         do_resize = do_resize if do_resize is not None else self.do_resize
         do_rescale = do_rescale if do_rescale is not None else self.do_rescale
@@ -273,4 +276,4 @@ def preprocess(
         images = [to_channel_dimension_format(image, data_format) for image in images]
 
         data = {"pixel_values": images}
-        return BatchFeature(data=data, tensor_type=return_tensors)
+        return BatchFeature(data=data, tensor_type=return_tensors, float_precision=float_precision)