diff --git a/src/transformers/models/maskformer/feature_extraction_maskformer.py b/src/transformers/models/maskformer/feature_extraction_maskformer.py index 9801a4545e90..3a5fd49d80fa 100644 --- a/src/transformers/models/maskformer/feature_extraction_maskformer.py +++ b/src/transformers/models/maskformer/feature_extraction_maskformer.py @@ -206,11 +206,9 @@ def __call__( instance id. To convert it to a binary mask of shape (`batch, num_labels, height, width`) we need a dictionary mapping instance ids to label ids to create a semantic segmentation map. - return_tensors (`str` or [`~utils.TensorType`], *optional*, defaults to `None`): - If set, will return a tensor of a particular framework. - - Acceptable values are: - - `'pt'`: Return PyTorch `torch.Tensor` objects. + return_tensors (`str` or [`~file_utils.TensorType`], *optional*): + If set, will return tensors instead of NumPy arrays. If set to `'pt'`, return PyTorch `torch.Tensor` + objects. Returns: [`BatchFeature`]: A [`BatchFeature`] with the following fields: @@ -285,19 +283,8 @@ def __call__( image=image, target=None, size=self.size, max_size=self.max_size )[0] - # if do_normalize=False, the casting to a numpy array won't happen, so we need to do it here - make_channel_first = True if isinstance(images[0], Image.Image) else images[0].shape[-1] in (1, 3) - images = [self.to_numpy_array(image, rescale=False, channel_first=make_channel_first) for image in images] - if segmentation_maps is not None: - segmentation_maps = [ - self.to_numpy_array(segmap, rescale=False, channel_first=True) for segmap in segmentation_maps - ] - if self.do_normalize: - images = [ - self.normalize(image=image, mean=self.image_mean, std=self.image_std, rescale=True) for image in images - ] - + images = [self.normalize(image=image, mean=self.image_mean, std=self.image_std) for image in images] # NOTE I will be always forced to pad them them since they have to be stacked in the batch dim encoded_inputs = self.encode_inputs( images, diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py index a8d14502aff9..461add8c0355 100644 --- a/tests/models/maskformer/test_feature_extraction_maskformer.py +++ b/tests/models/maskformer/test_feature_extraction_maskformer.py @@ -18,7 +18,6 @@ import numpy as np -from parameterized import parameterized from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_torch_available, is_vision_available @@ -402,43 +401,3 @@ def test_post_process_panoptic_segmentation(self): self.assertEqual( el["segmentation"].shape, (self.feature_extract_tester.height, self.feature_extract_tester.width) ) - - @require_torch - @parameterized.expand( - [ - ("do_resize_True_do_normalize_True", True, True), - ("do_resize_True_do_normalize_False", True, False), - ("do_resize_True_do_normalize_True", True, True), - ("do_resize_True_do_normalize_False", True, False), - ("do_resize_False_do_normalize_True", False, True), - ("do_resize_False_do_normalize_False", False, False), - ("do_resize_False_do_normalize_True", False, True), - ("do_resize_False_do_normalize_False", False, False), - ] - ) - def test_call_flags(self, _, do_resize, do_normalize): - # Initialize feature_extractor - feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) - feature_extractor.do_resize = do_resize - feature_extractor.do_normalize = do_normalize - # create random PIL images - image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) - - all_image_shapes = [img.size[::-1] for img in image_inputs] - if do_resize: - all_image_shapes = [ - self.feature_extract_tester.get_expected_values([image], batched=False) for image in image_inputs - ] - - max_across_dim = [max(shape) for shape in zip(*all_image_shapes)] - expected_shape = ( - self.feature_extract_tester.batch_size, - self.feature_extract_tester.num_channels, - *max_across_dim, - ) - - pixel_values = feature_extractor(image_inputs, return_tensors="pt")["pixel_values"] - self.assertEqual(len(pixel_values), self.feature_extract_tester.batch_size) - - self.assertEqual(pixel_values.shape, expected_shape) - self.assertIsInstance(pixel_values, torch.Tensor)