diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index a86938e1ffaa..d76ee572814e 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -96,6 +96,17 @@ def is_batched(img): return False +def is_scaled_image(image: np.ndarray) -> bool: + """ + Checks to see whether the pixel values have already been rescaled to [0, 1]. + """ + if image.dtype == np.uint8: + return False + + # It's possible the image has pixel values in [0, 255] but is of floating type + return np.min(image) >= 0 and np.max(image) <= 1 + + def make_list_of_images(images, expected_ndims: int = 3) -> List[ImageInput]: """ Ensure that the input is a list of images. If the input is a single image, it is converted to a list of length 1. diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index 040cca8984fe..930934bbefd4 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -28,6 +28,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -236,6 +237,11 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -328,7 +334,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index 938a2b71fd8c..7b7836ea79dc 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -32,6 +32,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -184,7 +185,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -270,6 +272,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index 0f6c0a9e7876..9cee3faee324 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -176,7 +177,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -253,6 +255,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 899fe0628561..0a2a289b741c 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -29,6 +29,7 @@ get_image_size, infer_channel_dimension_format, is_batched, + is_scaled_image, to_numpy_array, valid_images, ) @@ -387,7 +388,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -469,6 +471,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if do_resize: images = [ self.resize( diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 32b90eb39f4e..e6cbddb8cb0f 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -32,6 +32,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -182,7 +183,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -268,6 +270,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index 7ee5cd65d22e..47140abc999f 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -32,6 +32,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -183,7 +184,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -269,6 +271,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index 9cf2f65c9cb0..c2b28cd57d9b 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -42,6 +42,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_coco_detection_annotations, @@ -1126,7 +1127,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: @@ -1259,6 +1261,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 408766673e4a..62fb1bc1e722 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -32,6 +32,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -203,7 +204,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -280,6 +282,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 059e08e6079e..ae35a07e43d8 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -42,6 +42,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_coco_detection_annotations, @@ -1124,7 +1125,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: @@ -1257,6 +1259,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index fc92a4d25774..c10c44ba91e4 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -180,7 +181,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -258,6 +260,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deta/image_processing_deta.py b/src/transformers/models/deta/image_processing_deta.py index 22e73d032305..568990f536c8 100644 --- a/src/transformers/models/deta/image_processing_deta.py +++ b/src/transformers/models/deta/image_processing_deta.py @@ -40,6 +40,7 @@ get_image_size, infer_channel_dimension_format, is_batched, + is_scaled_image, to_numpy_array, valid_coco_detection_annotations, valid_coco_panoptic_annotations, @@ -796,7 +797,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`List[Dict]` or `List[List[Dict]]`, *optional*): List of annotations associated with the image or batch of images. If annotionation is for object detection, the annotations should be a dictionary with the following keys: @@ -921,6 +923,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index 1f062102a309..816fad102b5e 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -41,6 +41,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_coco_detection_annotations, @@ -1096,7 +1097,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): List of annotations associated with the image or batch of images. If annotation is for object detection, the annotations should be a dictionary with the following keys: @@ -1229,6 +1231,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index a1cd1c084a1c..72e192e0724d 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -33,6 +33,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -319,7 +320,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -407,6 +409,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 31092259e7d3..3c48cdaf7811 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -29,6 +29,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, is_torch_available, is_torch_tensor, make_list_of_images, @@ -230,7 +231,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -305,6 +307,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/efficientformer/image_processing_efficientformer.py b/src/transformers/models/efficientformer/image_processing_efficientformer.py index 8e1b81c28461..be8477678c5f 100644 --- a/src/transformers/models/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py @@ -32,6 +32,7 @@ PILImageResampling, infer_channel_dimension_format, is_batched, + is_scaled_image, to_numpy_array, valid_images, ) @@ -179,7 +180,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -256,6 +258,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 0957f60ecaeb..4661618ed52d 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -231,7 +232,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -315,6 +317,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index b1681e9d9f8a..b098b7c634dd 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -30,6 +30,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -414,6 +415,12 @@ def _preprocess_image( # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(image) @@ -481,7 +488,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): diff --git a/src/transformers/models/glpn/image_processing_glpn.py b/src/transformers/models/glpn/image_processing_glpn.py index a0c341e47629..15e30dcbce4e 100644 --- a/src/transformers/models/glpn/image_processing_glpn.py +++ b/src/transformers/models/glpn/image_processing_glpn.py @@ -26,6 +26,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -137,7 +138,8 @@ def preprocess( Args: images (`PIL.Image.Image` or `TensorType` or `List[np.ndarray]` or `List[TensorType]`): - The image or images to preprocess. + Images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_normalize=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the input such that the (height, width) dimensions are a multiple of `size_divisor`. size_divisor (`int`, *optional*, defaults to `self.size_divisor`): @@ -182,6 +184,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(img) for img in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index c75a0e890f40..2d317226b759 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -25,6 +25,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -190,7 +191,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_normalize=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -250,6 +252,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_normalize: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If you wish to do this, " + "make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].", + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 7eaa8b8373cb..26a5c7a16418 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -236,7 +237,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -314,6 +316,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index b00a3f42eb2a..b43f24d51a90 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -31,6 +31,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -192,7 +193,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -273,6 +275,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 90917d12ec29..80264af24e50 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -36,6 +36,7 @@ get_image_size, infer_channel_dimension_format, is_batched, + is_scaled_image, to_numpy_array, valid_images, ) @@ -606,6 +607,11 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index bd0a248e11d9..30a043518ea5 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -35,6 +35,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -613,6 +614,11 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index e703ad538fb7..c9b015c5c01f 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -31,6 +31,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -176,7 +177,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -254,6 +256,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index 26319cf1c4d8..9b015c88bf1d 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -31,6 +31,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -180,7 +181,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -258,6 +260,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index f5208812f1a1..6fab1491ad30 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -30,6 +30,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -189,7 +190,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -259,6 +261,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index 88250996a231..2e66efe61bc8 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -36,6 +36,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -574,6 +575,11 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( diff --git a/src/transformers/models/owlvit/image_processing_owlvit.py b/src/transformers/models/owlvit/image_processing_owlvit.py index 9c32daa23ff9..584e575603bd 100644 --- a/src/transformers/models/owlvit/image_processing_owlvit.py +++ b/src/transformers/models/owlvit/image_processing_owlvit.py @@ -34,6 +34,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -288,7 +289,8 @@ def preprocess( Args: images (`ImageInput`): - The image or batch of images to be prepared. + The image or batch of images to be prepared. Expects a single or batch of images with pixel values + ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether or not to resize the input. If `True`, will resize the input to the size specified by `size`. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -368,6 +370,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 06544ef0de10..272cf32fa5eb 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -28,6 +28,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -228,7 +229,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`): Whether to center crop the image to `crop_size`. crop_size (`Dict[str, int]`, *optional*, defaults to `self.crop_size`): @@ -304,6 +306,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/pix2struct/image_processing_pix2struct.py b/src/transformers/models/pix2struct/image_processing_pix2struct.py index 3833923455d9..ba9cc95fcb0c 100644 --- a/src/transformers/models/pix2struct/image_processing_pix2struct.py +++ b/src/transformers/models/pix2struct/image_processing_pix2struct.py @@ -382,7 +382,7 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images. header_text (`Union[List[str], str]`, *optional*): Text to render as a header. Only has an effect if `image_processor.is_vqa` is `True`. do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`): diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index afa8b1925e9f..ca2997dacf13 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -31,6 +31,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -231,7 +232,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -311,6 +313,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index 7a15407ae207..d5fdbddf6bee 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -164,7 +165,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -230,6 +232,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index d7e0976b9f4e..d98e81649088 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -30,6 +30,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -259,7 +260,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -344,6 +346,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index 177889955614..fd48b53f2b96 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -28,6 +28,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -232,6 +233,11 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -319,7 +325,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. segmentation_maps (`ImageInput`, *optional*): Segmentation map to preprocess. do_resize (`bool`, *optional*, defaults to `self.do_resize`): diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr.py b/src/transformers/models/swin2sr/image_processing_swin2sr.py index 0944a3c67894..95eafb3d01d9 100644 --- a/src/transformers/models/swin2sr/image_processing_swin2sr.py +++ b/src/transformers/models/swin2sr/image_processing_swin2sr.py @@ -24,6 +24,7 @@ ChannelDimension, ImageInput, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -122,7 +123,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_rescale (`bool`, *optional*, defaults to `self.do_rescale`): Whether to rescale the image values between [0 - 1]. rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`): @@ -170,6 +172,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/tvlt/image_processing_tvlt.py index 2d0a0202a444..f5860b2c1dcc 100644 --- a/src/transformers/models/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/tvlt/image_processing_tvlt.py @@ -30,6 +30,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, is_valid_image, to_numpy_array, valid_images, @@ -226,6 +227,12 @@ def _preprocess_image( # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -269,7 +276,8 @@ def preprocess( Args: videos (`ImageInput`): - Images or videos to preprocess. + Images or videos to preprocess. Expects a single or batch of frames with pixel values ranging from 0 to + 255. If passing in frames with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index cbe95eb89ab5..aa40dd3c7962 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -31,6 +31,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, is_valid_image, to_numpy_array, valid_images, @@ -205,6 +206,12 @@ def _preprocess_image( # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -246,7 +253,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index 934ce5be2e90..8dc0d156cba1 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -28,6 +28,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -357,7 +358,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -433,6 +435,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 60c316a0bbf7..1b7b3c5fd4c4 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -27,6 +27,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -163,7 +164,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -229,6 +231,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py index 77b0f629097a..b3a246a19075 100644 --- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py @@ -32,6 +32,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -184,7 +185,8 @@ def preprocess( Args: images (`ImageInput`): - Image to preprocess. + Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If + passing in images with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): @@ -270,6 +272,12 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 1578586051de..4884180fe748 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -34,6 +34,7 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, + is_scaled_image, is_valid_image, to_numpy_array, valid_images, @@ -257,6 +258,12 @@ def _preprocess_image( # All transformations expect numpy arrays. image = to_numpy_array(image) + if is_scaled_image(image) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -299,7 +306,8 @@ def preprocess( Args: videos (`ImageInput`): - Video frames to preprocess. + Video frames to preprocess. Expects a single or batch of video frames with pixel values ranging from 0 + to 255. If passing in frames with pixel values between 0 and 1, set `do_rescale=False`. do_resize (`bool`, *optional*, defaults to `self.do_resize`): Whether to resize the image. size (`Dict[str, int]`, *optional*, defaults to `self.size`): diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index 9ff6586e0674..c51f5add3049 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -40,6 +40,7 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, + is_scaled_image, make_list_of_images, to_numpy_array, valid_coco_detection_annotations, @@ -1033,7 +1034,8 @@ def preprocess( Args: images (`ImageInput`): - Image or batch of images to preprocess. + Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging + from 0 to 255. If passing in images with pixel values between 0 and 1, set `do_rescale=False`. annotations (`AnnotationType` or `List[AnnotationType]`, *optional*): List of annotations associated with the image or batch of images. If annotionation is for object detection, the annotations should be a dictionary with the following keys: @@ -1163,6 +1165,12 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] + if is_scaled_image(images[0]) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0])