From 285d1913d83b3043321eea4e3ced032087eda647 Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Thu, 15 Dec 2022 17:15:27 +0000
Subject: [PATCH 1/8] Add test_image_processing_common.py

---
 .../models/beit/test_image_processing_beit.py |   3 +-
 .../test_image_processing_conditional_detr.py |   3 +-
 .../test_image_processing_convnext.py         |   3 +-
 .../test_image_processing_deformable_detr.py  |   3 +-
 .../models/deit/test_image_processing_deit.py |   3 +-
 .../models/detr/test_image_processing_detr.py |   3 +-
 .../donut/test_image_processing_donut.py      |   3 +-
 tests/models/dpt/test_image_processing_dpt.py |   3 +-
 .../flava/test_image_processing_flava.py      |   3 +-
 .../models/glpn/test_image_processing_glpn.py |   3 +-
 .../test_image_processing_layoutlmv2.py       |   3 +-
 .../test_image_processing_layoutlmv3.py       |   3 +-
 .../levit/test_image_processing_levit.py      |   3 +-
 .../test_image_processing_maskformer.py       |   3 +-
 .../test_image_processing_mobilenet_v1.py     |   3 +-
 .../test_image_processing_mobilenet_v2.py     |   3 +-
 .../test_image_processing_mobilevit.py        |   3 +-
 .../owlvit/test_image_processing_owlvit.py    |   3 +-
 .../test_image_processing_poolformer.py       |   3 +-
 .../test_image_processing_segformer.py        |   3 +-
 .../test_image_processing_videomae.py         |   3 +-
 .../models/vilt/test_image_processing_vilt.py |   3 +-
 tests/models/vit/test_image_processing_vit.py |   3 +-
 .../yolos/test_image_processing_yolos.py      |   3 +-
 tests/test_feature_extraction_common.py       | 123 -------
 tests/test_image_processing_common.py         | 320 ++++++++++++++++++
 26 files changed, 368 insertions(+), 147 deletions(-)
 create mode 100644 tests/test_image_processing_common.py

diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py
index 545b4d79a9e8..b499f008457b 100644
--- a/tests/models/beit/test_image_processing_beit.py
+++ b/tests/models/beit/test_image_processing_beit.py
@@ -22,7 +22,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
index 4f3a6e21e0c9..b4e6f46d3e9e 100644
--- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py
+++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py
index 9777c3df6d06..4fd62fc51d19 100644
--- a/tests/models/convnext/test_image_processing_convnext.py
+++ b/tests/models/convnext/test_image_processing_convnext.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
index aaafb7ff2f23..bc6368953949 100644
--- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py
+++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/deit/test_image_processing_deit.py b/tests/models/deit/test_image_processing_deit.py
index f684008ccc3f..db1e42f77109 100644
--- a/tests/models/deit/test_image_processing_deit.py
+++ b/tests/models/deit/test_image_processing_deit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py
index 6aafd62da4bd..253ffb7c2972 100644
--- a/tests/models/detr/test_image_processing_detr.py
+++ b/tests/models/detr/test_image_processing_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/donut/test_image_processing_donut.py b/tests/models/donut/test_image_processing_donut.py
index 4d0f88ac988b..550d166e460d 100644
--- a/tests/models/donut/test_image_processing_donut.py
+++ b/tests/models/donut/test_image_processing_donut.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import is_flaky, require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py
index 594b1451a74e..0bbeb173e597 100644
--- a/tests/models/dpt/test_image_processing_dpt.py
+++ b/tests/models/dpt/test_image_processing_dpt.py
@@ -21,7 +21,8 @@
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.testing_utils import require_torch, require_vision
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/flava/test_image_processing_flava.py b/tests/models/flava/test_image_processing_flava.py
index ba6379e6b348..28718748200d 100644
--- a/tests/models/flava/test_image_processing_flava.py
+++ b/tests/models/flava/test_image_processing_flava.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/glpn/test_image_processing_glpn.py b/tests/models/glpn/test_image_processing_glpn.py
index 4e7f2bdf5c78..31e527761771 100644
--- a/tests/models/glpn/test_image_processing_glpn.py
+++ b/tests/models/glpn/test_image_processing_glpn.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
index c26eaac16eba..4423d33376e4 100644
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
index c8eb976bf584..829fc8d79dde 100644
--- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/levit/test_image_processing_levit.py b/tests/models/levit/test_image_processing_levit.py
index 2b1472d9b62a..76f3c66e1ade 100644
--- a/tests/models/levit/test_image_processing_levit.py
+++ b/tests/models/levit/test_image_processing_levit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py
index 2036d9f7d28f..f8ddf8c9dc03 100644
--- a/tests/models/maskformer/test_image_processing_maskformer.py
+++ b/tests/models/maskformer/test_image_processing_maskformer.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
index 270d38d5b818..383f91c554f8 100644
--- a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
+++ b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
index 3cb4eea21842..e207932e38e0 100644
--- a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
+++ b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py
index 468c4689e4dc..a22fc2c1d541 100644
--- a/tests/models/mobilevit/test_image_processing_mobilevit.py
+++ b/tests/models/mobilevit/test_image_processing_mobilevit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/owlvit/test_image_processing_owlvit.py b/tests/models/owlvit/test_image_processing_owlvit.py
index fe259b11696f..bf2cd8d666d2 100644
--- a/tests/models/owlvit/test_image_processing_owlvit.py
+++ b/tests/models/owlvit/test_image_processing_owlvit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/poolformer/test_image_processing_poolformer.py b/tests/models/poolformer/test_image_processing_poolformer.py
index b1fffe8a5a72..47e583a3211a 100644
--- a/tests/models/poolformer/test_image_processing_poolformer.py
+++ b/tests/models/poolformer/test_image_processing_poolformer.py
@@ -20,7 +20,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py
index 4257b27b814e..a104fc2f4835 100644
--- a/tests/models/segformer/test_image_processing_segformer.py
+++ b/tests/models/segformer/test_image_processing_segformer.py
@@ -22,7 +22,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/videomae/test_image_processing_videomae.py b/tests/models/videomae/test_image_processing_videomae.py
index f792a9be844e..025c39ef97f8 100644
--- a/tests/models/videomae/test_image_processing_videomae.py
+++ b/tests/models/videomae/test_image_processing_videomae.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_video_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_video_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/vilt/test_image_processing_vilt.py b/tests/models/vilt/test_image_processing_vilt.py
index 5816eacf8359..5d7be90a7475 100644
--- a/tests/models/vilt/test_image_processing_vilt.py
+++ b/tests/models/vilt/test_image_processing_vilt.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/vit/test_image_processing_vit.py b/tests/models/vit/test_image_processing_vit.py
index f4197425099d..a0db60887e40 100644
--- a/tests/models/vit/test_image_processing_vit.py
+++ b/tests/models/vit/test_image_processing_vit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py
index 2c1571d7f7de..4e22baa4d668 100644
--- a/tests/models/yolos/test_image_processing_yolos.py
+++ b/tests/models/yolos/test_image_processing_yolos.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py
index fe8d02480644..98f143506bc8 100644
--- a/tests/test_feature_extraction_common.py
+++ b/tests/test_feature_extraction_common.py
@@ -53,94 +53,6 @@
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")
 
 
-def prepare_image_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
-    """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
-    or a list of PyTorch tensors if one specifies torchify=True.
-
-    One can specify whether the images are of the same resolution or not.
-    """
-
-    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
-
-    image_inputs = []
-    for i in range(feature_extract_tester.batch_size):
-        if equal_resolution:
-            width = height = feature_extract_tester.max_resolution
-        else:
-            # To avoid getting image width/height 0
-            min_resolution = feature_extract_tester.min_resolution
-            if getattr(feature_extract_tester, "size_divisor", None):
-                # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor`
-                min_resolution = max(feature_extract_tester.size_divisor, min_resolution)
-            width, height = np.random.choice(np.arange(min_resolution, feature_extract_tester.max_resolution), 2)
-        image_inputs.append(
-            np.random.randint(
-                255,
-                size=(
-                    feature_extract_tester.num_channels,
-                    width,
-                    height,
-                ),
-                dtype=np.uint8,
-            )
-        )
-
-    if not numpify and not torchify:
-        # PIL expects the channel dimension as last dimension
-        image_inputs = [Image.fromarray(np.moveaxis(image, 0, -1)) for image in image_inputs]
-
-    if torchify:
-        image_inputs = [torch.from_numpy(image) for image in image_inputs]
-
-    return image_inputs
-
-
-def prepare_video(feature_extract_tester, width=10, height=10, numpify=False, torchify=False):
-    """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors."""
-
-    video = []
-    for i in range(feature_extract_tester.num_frames):
-        video.append(np.random.randint(255, size=(feature_extract_tester.num_channels, width, height), dtype=np.uint8))
-
-    if not numpify and not torchify:
-        # PIL expects the channel dimension as last dimension
-        video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video]
-
-    if torchify:
-        video = [torch.from_numpy(frame) for frame in video]
-
-    return video
-
-
-def prepare_video_inputs(feature_extract_tester, equal_resolution=False, numpify=False, torchify=False):
-    """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if
-    one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True.
-
-    One can specify whether the videos are of the same resolution or not.
-    """
-
-    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
-
-    video_inputs = []
-    for i in range(feature_extract_tester.batch_size):
-        if equal_resolution:
-            width = height = feature_extract_tester.max_resolution
-        else:
-            width, height = np.random.choice(
-                np.arange(feature_extract_tester.min_resolution, feature_extract_tester.max_resolution), 2
-            )
-            video = prepare_video(
-                feature_extract_tester=feature_extract_tester,
-                width=width,
-                height=height,
-                numpify=numpify,
-                torchify=torchify,
-            )
-        video_inputs.append(video)
-
-    return video_inputs
-
-
 class FeatureExtractionSavingTestMixin:
     test_cast_dtype = None
 
@@ -174,41 +86,6 @@ def test_init_without_params(self):
         feat_extract = self.feature_extraction_class()
         self.assertIsNotNone(feat_extract)
 
-    @require_torch
-    @require_vision
-    def test_cast_dtype_device(self):
-        if self.test_cast_dtype is not None:
-            # Initialize feature_extractor
-            feature_extractor = self.feature_extraction_class(**self.feat_extract_dict)
-
-            # create random PyTorch tensors
-            image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt")
-            # for layoutLM compatiblity
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float32)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt").to(torch.float16)
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
-
-            encoding = feature_extractor(image_inputs, return_tensors="pt").to("cpu", torch.bfloat16)
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.bfloat16)
-
-            with self.assertRaises(TypeError):
-                _ = feature_extractor(image_inputs, return_tensors="pt").to(torch.bfloat16, "cpu")
-
-            # Try with text + image feature
-            encoding = feature_extractor(image_inputs, return_tensors="pt")
-            encoding.update({"input_ids": torch.LongTensor([[1, 2, 3], [4, 5, 6]])})
-            encoding = encoding.to(torch.float16)
-
-            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
-            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
-            self.assertEqual(encoding.input_ids.dtype, torch.long)
-
 
 class FeatureExtractorUtilTester(unittest.TestCase):
     def test_cached_files_are_used_when_internet_is_down(self):
diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
new file mode 100644
index 000000000000..6485f2fc87a8
--- /dev/null
+++ b/tests/test_image_processing_common.py
@@ -0,0 +1,320 @@
+# coding=utf-8
+# Copyright 2021 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import json
+import os
+import sys
+import tempfile
+import unittest
+import unittest.mock as mock
+from pathlib import Path
+
+from huggingface_hub import HfFolder, delete_repo, set_access_token
+from requests.exceptions import HTTPError
+from transformers import ImageProcessor, ViTImageProcessor
+from transformers.testing_utils import (
+    TOKEN,
+    USER,
+    check_json_file_has_correct_format,
+    get_tests_dir,
+    is_staging_test,
+    require_torch,
+    require_vision,
+)
+from transformers.utils import is_torch_available, is_vision_available
+
+
+sys.path.append(str(Path(__file__).parent.parent / "utils"))
+
+from test_module.custom_image_processing import CustomImageProcessor  # noqa E402
+
+
+if is_torch_available():
+    import numpy as np
+    import torch
+
+if is_vision_available():
+    from PIL import Image
+
+
+SAMPLE_IMAGE_PROCESSING_CONFIG_DIR = get_tests_dir("fixtures")
+
+
+def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False):
+    """This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
+    or a list of PyTorch tensors if one specifies torchify=True.
+
+    One can specify whether the images are of the same resolution or not.
+    """
+
+    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
+
+    image_inputs = []
+    for i in range(image_processor_tester.batch_size):
+        if equal_resolution:
+            width = height = image_processor_tester.max_resolution
+        else:
+            # To avoid getting image width/height 0
+            min_resolution = image_processor_tester.min_resolution
+            if getattr(image_processor_tester, "size_divisor", None):
+                # If `size_divisor` is defined, the image needs to have width/size >= `size_divisor`
+                min_resolution = max(image_processor_tester.size_divisor, min_resolution)
+            width, height = np.random.choice(np.arange(min_resolution, image_processor_tester.max_resolution), 2)
+        image_inputs.append(
+            np.random.randint(
+                255,
+                size=(
+                    image_processor_tester.num_channels,
+                    width,
+                    height,
+                ),
+                dtype=np.uint8,
+            )
+        )
+
+    if not numpify and not torchify:
+        # PIL expects the channel dimension as last dimension
+        image_inputs = [Image.fromarray(np.moveaxis(image, 0, -1)) for image in image_inputs]
+
+    if torchify:
+        image_inputs = [torch.from_numpy(image) for image in image_inputs]
+
+    return image_inputs
+
+
+def prepare_video(image_processor_tester, width=10, height=10, numpify=False, torchify=False):
+    """This function prepares a video as a list of PIL images/NumPy arrays/PyTorch tensors."""
+
+    video = []
+    for i in range(image_processor_tester.num_frames):
+        video.append(np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8))
+
+    if not numpify and not torchify:
+        # PIL expects the channel dimension as last dimension
+        video = [Image.fromarray(np.moveaxis(frame, 0, -1)) for frame in video]
+
+    if torchify:
+        video = [torch.from_numpy(frame) for frame in video]
+
+    return video
+
+
+def prepare_video_inputs(image_processor_tester, equal_resolution=False, numpify=False, torchify=False):
+    """This function prepares a batch of videos: a list of list of PIL images, or a list of list of numpy arrays if
+    one specifies numpify=True, or a list of list of PyTorch tensors if one specifies torchify=True.
+
+    One can specify whether the videos are of the same resolution or not.
+    """
+
+    assert not (numpify and torchify), "You cannot specify both numpy and PyTorch tensors at the same time"
+
+    video_inputs = []
+    for i in range(image_processor_tester.batch_size):
+        if equal_resolution:
+            width = height = image_processor_tester.max_resolution
+        else:
+            width, height = np.random.choice(
+                np.arange(image_processor_tester.min_resolution, image_processor_tester.max_resolution), 2
+            )
+            video = prepare_video(
+                image_processor_tester=image_processor_tester,
+                width=width,
+                height=height,
+                numpify=numpify,
+                torchify=torchify,
+            )
+        video_inputs.append(video)
+
+    return video_inputs
+
+
+class ImageProcessingSavingTestMixin:
+    test_cast_dtype = None
+
+    def test_image_processor_to_json_string(self):
+        image_processor = self.image_processing_class(**self.image_processor_dict)
+        obj = json.loads(image_processor.to_json_string())
+        for key, value in self.image_processor_dict.items():
+            self.assertEqual(obj[key], value)
+
+    def test_image_processor_to_json_file(self):
+        image_processor_first = self.image_processing_class(**self.image_processor_dict)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            json_file_path = os.path.join(tmpdirname, "image_processor.json")
+            image_processor_first.to_json_file(json_file_path)
+            image_processor_second = self.image_processing_class.from_json_file(json_file_path)
+
+        self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict())
+
+    def test_image_processor_from_and_save_pretrained(self):
+        image_processor_first = self.image_processing_class(**self.image_processor_dict)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            saved_file = image_processor_first.save_pretrained(tmpdirname)[0]
+            check_json_file_has_correct_format(saved_file)
+            image_processor_second = self.image_processing_class.from_pretrained(tmpdirname)
+
+        self.assertEqual(image_processor_second.to_dict(), image_processor_first.to_dict())
+
+    def test_init_without_params(self):
+        image_processor = self.image_processing_class()
+        self.assertIsNotNone(image_processor)
+
+    @require_torch
+    @require_vision
+    def test_cast_dtype_device(self):
+        if self.test_cast_dtype is not None:
+            # Initialize image_processor
+            image_processor = self.image_processing_class(**self.image_processor_dict)
+
+            # create random PyTorch tensors
+            image_inputs = prepare_image_inputs(self.image_processor_tester, equal_resolution=False, torchify=True)
+
+            encoding = image_processor(image_inputs, return_tensors="pt")
+            # for layoutLM compatiblity
+            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
+            self.assertEqual(encoding.pixel_values.dtype, torch.float32)
+
+            encoding = image_processor(image_inputs, return_tensors="pt").to(torch.float16)
+            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
+            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
+
+            encoding = image_processor(image_inputs, return_tensors="pt").to("cpu", torch.bfloat16)
+            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
+            self.assertEqual(encoding.pixel_values.dtype, torch.bfloat16)
+
+            with self.assertRaises(TypeError):
+                _ = image_processor(image_inputs, return_tensors="pt").to(torch.bfloat16, "cpu")
+
+            # Try with text + image feature
+            encoding = image_processor(image_inputs, return_tensors="pt")
+            encoding.update({"input_ids": torch.LongTensor([[1, 2, 3], [4, 5, 6]])})
+            encoding = encoding.to(torch.float16)
+
+            self.assertEqual(encoding.pixel_values.device, torch.device("cpu"))
+            self.assertEqual(encoding.pixel_values.dtype, torch.float16)
+            self.assertEqual(encoding.input_ids.dtype, torch.long)
+
+
+class ImageProcessorUtilTester(unittest.TestCase):
+    def test_cached_files_are_used_when_internet_is_down(self):
+        # A mock response for an HTTP head request to emulate server down
+        response_mock = mock.Mock()
+        response_mock.status_code = 500
+        response_mock.headers = {}
+        response_mock.raise_for_status.side_effect = HTTPError
+        response_mock.json.return_value = {}
+
+        # Download this model to make sure it's in the cache.
+        _ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit")
+        # Under the mock environment we get a 500 error when trying to reach the model.
+        with mock.patch("requests.request", return_value=response_mock) as mock_head:
+            _ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit")
+            # This check we did call the fake head request
+            mock_head.assert_called()
+
+    def test_legacy_load_from_url(self):
+        # This test is for deprecated behavior and can be removed in v5
+        _ = ViTImageProcessor.from_pretrained(
+            "https://huggingface.co/hf-internal-testing/tiny-random-vit/resolve/main/preprocessor_config.json"
+        )
+
+
+@is_staging_test
+class ImageProcessorPushToHubTester(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._token = TOKEN
+        set_access_token(TOKEN)
+        HfFolder.save_token(TOKEN)
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            delete_repo(token=cls._token, repo_id="test-image-processor")
+        except HTTPError:
+            pass
+
+        try:
+            delete_repo(token=cls._token, repo_id="valid_org/test-image-processor-org")
+        except HTTPError:
+            pass
+
+        try:
+            delete_repo(token=cls._token, repo_id="test-dynamic-image-processor")
+        except HTTPError:
+            pass
+
+    def test_push_to_hub(self):
+        image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
+        image_processor.push_to_hub("test-image-processor", use_auth_token=self._token)
+
+        new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor")
+        for k, v in image_processor.__dict__.items():
+            self.assertEqual(v, getattr(new_image_processor, k))
+
+        # Reset repo
+        delete_repo(token=self._token, repo_id="test-image-processor")
+
+        # Push to hub via save_pretrained
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            image_processor.save_pretrained(
+                tmp_dir, repo_id="test-image-processor", push_to_hub=True, use_auth_token=self._token
+            )
+
+        new_image_processor = ViTImageProcessor.from_pretrained(f"{USER}/test-image-processor")
+        for k, v in image_processor.__dict__.items():
+            self.assertEqual(v, getattr(new_image_processor, k))
+
+    def test_push_to_hub_in_organization(self):
+        image_processor = ViTImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
+        image_processor.push_to_hub("valid_org/test-image-processor", use_auth_token=self._token)
+
+        new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor")
+        for k, v in image_processor.__dict__.items():
+            self.assertEqual(v, getattr(new_image_processor, k))
+
+        # Reset repo
+        delete_repo(token=self._token, repo_id="valid_org/test-image-processor")
+
+        # Push to hub via save_pretrained
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            image_processor.save_pretrained(
+                tmp_dir, repo_id="valid_org/test-image-processor-org", push_to_hub=True, use_auth_token=self._token
+            )
+
+        new_image_processor = ViTImageProcessor.from_pretrained("valid_org/test-image-processor-org")
+        for k, v in image_processor.__dict__.items():
+            self.assertEqual(v, getattr(new_image_processor, k))
+
+    def test_push_to_hub_dynamic_image_processor(self):
+        CustomImageProcessor.register_for_auto_class()
+        image_processor = CustomImageProcessor.from_pretrained(SAMPLE_IMAGE_PROCESSING_CONFIG_DIR)
+
+        image_processor.push_to_hub("test-dynamic-image-processor", use_auth_token=self._token)
+
+        # This has added the proper auto_map field to the config
+        self.assertDictEqual(
+            image_processor.auto_map,
+            {"ImageProcessor": "custom_image_processing.CustomImageProcessor"},
+        )
+
+        new_image_processor = ImageProcessor.from_pretrained(
+            f"{USER}/test-dynamic-image-processor", trust_remote_code=True
+        )
+        # Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module
+        self.assertEqual(new_image_processor.__class__.__name__, "CustomImageProcessor")

From 5d5c3dfaa67c0f303d190cda71b80fdddab54edf Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Thu, 15 Dec 2022 17:22:48 +0000
Subject: [PATCH 2/8] Fix typo

---
 tests/test_image_processing_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
index 6485f2fc87a8..35fbe83e695f 100644
--- a/tests/test_image_processing_common.py
+++ b/tests/test_image_processing_common.py
@@ -24,7 +24,7 @@
 
 from huggingface_hub import HfFolder, delete_repo, set_access_token
 from requests.exceptions import HTTPError
-from transformers import ImageProcessor, ViTImageProcessor
+from transformers import AutoImageProcessor, ViTImageProcessor
 from transformers.testing_utils import (
     TOKEN,
     USER,
@@ -313,7 +313,7 @@ def test_push_to_hub_dynamic_image_processor(self):
             {"ImageProcessor": "custom_image_processing.CustomImageProcessor"},
         )
 
-        new_image_processor = ImageProcessor.from_pretrained(
+        new_image_processor = AutoImageProcessor.from_pretrained(
             f"{USER}/test-dynamic-image-processor", trust_remote_code=True
         )
         # Can't make an isinstance check because the new_image_processor is from the CustomImageProcessor class of a dynamic module

From 7311ea0b19dd0fed7799be6f80913322dad15213 Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Thu, 19 Jan 2023 16:22:59 +0000
Subject: [PATCH 3/8] Update imports and test fetcher

---
 .../models/beit/test_image_processing_beit.py |  5 ++---
 .../models/blip/test_image_processing_blip.py |  6 +++---
 .../test_image_processing_chinese_clip.py     |  6 +++---
 .../models/clip/test_image_processing_clip.py |  6 +++---
 .../test_image_processing_conditional_detr.py |  5 ++---
 .../test_image_processing_convnext.py         |  5 ++---
 .../test_image_processing_deformable_detr.py  |  5 ++---
 .../models/deit/test_image_processing_deit.py |  5 ++---
 .../models/detr/test_image_processing_detr.py |  5 ++---
 .../donut/test_image_processing_donut.py      |  5 ++---
 tests/models/dpt/test_image_processing_dpt.py |  5 ++---
 .../flava/test_image_processing_flava.py      |  5 ++---
 .../models/glpn/test_image_processing_glpn.py |  5 ++---
 .../test_image_processing_imagegpt.py         |  4 ++--
 .../test_image_processing_layoutlmv2.py       |  5 ++---
 .../test_image_processing_layoutlmv3.py       |  5 ++---
 .../levit/test_image_processing_levit.py      |  5 ++---
 .../test_image_processing_maskformer.py       |  5 ++---
 .../test_image_processing_mobilenet_v1.py     |  5 ++---
 .../test_image_processing_mobilenet_v2.py     |  5 ++---
 .../test_image_processing_mobilevit.py        |  5 ++---
 .../test_image_processing_oneformer.py        |  4 ++--
 .../owlvit/test_image_processing_owlvit.py    |  5 ++---
 .../test_image_processing_poolformer.py       |  5 ++---
 .../test_image_processing_segformer.py        |  5 ++---
 .../swin2sr/test_image_processing_swin2sr.py  |  4 ++--
 .../test_image_processing_videomae.py         |  5 ++---
 .../models/vilt/test_image_processing_vilt.py |  5 ++---
 tests/models/vit/test_image_processing_vit.py |  5 ++---
 .../yolos/test_image_processing_yolos.py      |  5 ++---
 tests/test_feature_extraction_common.py       | 19 +------------------
 utils/tests_fetcher.py                        |  1 +
 32 files changed, 65 insertions(+), 105 deletions(-)

diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py
index b499f008457b..ad6b2ae72331 100644
--- a/tests/models/beit/test_image_processing_beit.py
+++ b/tests/models/beit/test_image_processing_beit.py
@@ -22,8 +22,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -105,7 +104,7 @@ def prepare_semantic_batch_inputs():
 
 @require_torch
 @require_vision
-class BeitFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class BeitFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BeitFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/blip/test_image_processing_blip.py b/tests/models/blip/test_image_processing_blip.py
index ea31038b14ab..1becb12a2243 100644
--- a/tests/models/blip/test_image_processing_blip.py
+++ b/tests/models/blip/test_image_processing_blip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import ImageProcessingSavingTestMixin
 
 
 if is_torch_available():
@@ -109,7 +109,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class BlipImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class BlipImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BlipImageProcessor if is_vision_available() else None
 
@@ -231,7 +231,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class BlipImageProcessingTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class BlipImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BlipImageProcessor if is_vision_available() else None
 
diff --git a/tests/models/chinese_clip/test_image_processing_chinese_clip.py b/tests/models/chinese_clip/test_image_processing_chinese_clip.py
index 616dfa3ffc7a..9b16f8525ff3 100644
--- a/tests/models/chinese_clip/test_image_processing_chinese_clip.py
+++ b/tests/models/chinese_clip/test_image_processing_chinese_clip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import ImageProcessingSavingTestMixin
 
 
 if is_torch_available():
@@ -113,7 +113,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class ChineseCLIPFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ChineseCLIPFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ChineseCLIPFeatureExtractor if is_vision_available() else None
 
@@ -246,7 +246,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class ChineseCLIPFeatureExtractionTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ChineseCLIPFeatureExtractionTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ChineseCLIPFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/clip/test_image_processing_clip.py b/tests/models/clip/test_image_processing_clip.py
index 8f29b63bbb55..0dd2d7e7646e 100644
--- a/tests/models/clip/test_image_processing_clip.py
+++ b/tests/models/clip/test_image_processing_clip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import ImageProcessingSavingTestMixin
 
 
 if is_torch_available():
@@ -113,7 +113,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class CLIPFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class CLIPFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = CLIPFeatureExtractor if is_vision_available() else None
 
@@ -246,7 +246,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class CLIPFeatureExtractionTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class CLIPFeatureExtractionTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = CLIPFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
index b4e6f46d3e9e..038e5d52e118 100644
--- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py
+++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
@@ -23,8 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -115,7 +114,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class ConditionalDetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ConditionalDetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ConditionalDetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py
index 4fd62fc51d19..053189701210 100644
--- a/tests/models/convnext/test_image_processing_convnext.py
+++ b/tests/models/convnext/test_image_processing_convnext.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -77,7 +76,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ConvNextFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ConvNextFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ConvNextFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
index bc6368953949..f582d5553995 100644
--- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py
+++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
@@ -23,8 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -115,7 +114,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class DeformableDetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class DeformableDetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DeformableDetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/deit/test_image_processing_deit.py b/tests/models/deit/test_image_processing_deit.py
index db1e42f77109..18508df5d45c 100644
--- a/tests/models/deit/test_image_processing_deit.py
+++ b/tests/models/deit/test_image_processing_deit.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -82,7 +81,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DeiTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class DeiTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DeiTFeatureExtractor if is_vision_available() else None
     test_cast_dtype = True
diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py
index 253ffb7c2972..1da7ae101240 100644
--- a/tests/models/detr/test_image_processing_detr.py
+++ b/tests/models/detr/test_image_processing_detr.py
@@ -23,8 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -115,7 +114,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class DetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/donut/test_image_processing_donut.py b/tests/models/donut/test_image_processing_donut.py
index 550d166e460d..81dc4389a1f0 100644
--- a/tests/models/donut/test_image_processing_donut.py
+++ b/tests/models/donut/test_image_processing_donut.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import is_flaky, require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -82,7 +81,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DonutFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class DonutFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DonutFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py
index 0bbeb173e597..e17abce74ae6 100644
--- a/tests/models/dpt/test_image_processing_dpt.py
+++ b/tests/models/dpt/test_image_processing_dpt.py
@@ -21,8 +21,7 @@
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.testing_utils import require_torch, require_vision
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -74,7 +73,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class DPTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DPTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/flava/test_image_processing_flava.py b/tests/models/flava/test_image_processing_flava.py
index 28718748200d..939520e780ba 100644
--- a/tests/models/flava/test_image_processing_flava.py
+++ b/tests/models/flava/test_image_processing_flava.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -160,7 +159,7 @@ def get_expected_codebook_image_size(self):
 
 @require_torch
 @require_vision
-class FlavaFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class FlavaFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = FlavaFeatureExtractor if is_vision_available() else None
     maxDiff = None
diff --git a/tests/models/glpn/test_image_processing_glpn.py b/tests/models/glpn/test_image_processing_glpn.py
index 31e527761771..bbbd54269fc9 100644
--- a/tests/models/glpn/test_image_processing_glpn.py
+++ b/tests/models/glpn/test_image_processing_glpn.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -67,7 +66,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class GLPNFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class GLPNFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = GLPNFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/imagegpt/test_image_processing_imagegpt.py b/tests/models/imagegpt/test_image_processing_imagegpt.py
index 465a6015a39a..2ef41cfc5df8 100644
--- a/tests/models/imagegpt/test_image_processing_imagegpt.py
+++ b/tests/models/imagegpt/test_image_processing_imagegpt.py
@@ -25,7 +25,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import ImageProcessingSavingTestMixin
 
 
 if is_torch_available():
@@ -78,7 +78,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ImageGPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ImageGPTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ImageGPTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
index 4423d33376e4..112d40cca383 100644
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -64,7 +63,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_pytesseract
-class LayoutLMv2FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class LayoutLMv2FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LayoutLMv2FeatureExtractor if is_pytesseract_available() else None
 
diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
index 829fc8d79dde..6d9f4fd58b6f 100644
--- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -64,7 +63,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_pytesseract
-class LayoutLMv3FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class LayoutLMv3FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LayoutLMv3FeatureExtractor if is_pytesseract_available() else None
 
diff --git a/tests/models/levit/test_image_processing_levit.py b/tests/models/levit/test_image_processing_levit.py
index 76f3c66e1ade..912bb9deb545 100644
--- a/tests/models/levit/test_image_processing_levit.py
+++ b/tests/models/levit/test_image_processing_levit.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -81,7 +80,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class LevitFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class LevitFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LevitFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py
index f8ddf8c9dc03..ccad43d4185a 100644
--- a/tests/models/maskformer/test_image_processing_maskformer.py
+++ b/tests/models/maskformer/test_image_processing_maskformer.py
@@ -23,8 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -131,7 +130,7 @@ def get_fake_maskformer_outputs(self):
 
 @require_torch
 @require_vision
-class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class MaskFormerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MaskFormerFeatureExtractor if (is_vision_available() and is_torch_available()) else None
 
diff --git a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
index 383f91c554f8..c0b6f2979405 100644
--- a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
+++ b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -72,7 +71,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileNetV1FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class MobileNetV1FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileNetV1FeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
index e207932e38e0..aa35b62383da 100644
--- a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
+++ b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -72,7 +71,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileNetV2FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class MobileNetV2FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileNetV2FeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py
index a22fc2c1d541..ce3d07a3ad23 100644
--- a/tests/models/mobilevit/test_image_processing_mobilevit.py
+++ b/tests/models/mobilevit/test_image_processing_mobilevit.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -75,7 +74,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class MobileViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py
index f34ae080cf96..8faa441b5184 100644
--- a/tests/models/oneformer/test_image_processing_oneformer.py
+++ b/tests/models/oneformer/test_image_processing_oneformer.py
@@ -23,7 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -155,7 +155,7 @@ def get_fake_oneformer_outputs(self):
 
 @require_torch
 @require_vision
-class OneFormerImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
     image_processing_class = OneFormerImageProcessor if (is_vision_available() and is_torch_available()) else None
     # only for test_feat_extracttion_common.test_feat_extract_to_json_string
     feature_extraction_class = image_processing_class
diff --git a/tests/models/owlvit/test_image_processing_owlvit.py b/tests/models/owlvit/test_image_processing_owlvit.py
index bf2cd8d666d2..77e3ebc52049 100644
--- a/tests/models/owlvit/test_image_processing_owlvit.py
+++ b/tests/models/owlvit/test_image_processing_owlvit.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -82,7 +81,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class OwlViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class OwlViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = OwlViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/poolformer/test_image_processing_poolformer.py b/tests/models/poolformer/test_image_processing_poolformer.py
index 47e583a3211a..7f999c9c13f3 100644
--- a/tests/models/poolformer/test_image_processing_poolformer.py
+++ b/tests/models/poolformer/test_image_processing_poolformer.py
@@ -20,8 +20,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -78,7 +77,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class PoolFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class PoolFormerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = PoolFormerFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py
index a104fc2f4835..de0c2d2ac203 100644
--- a/tests/models/segformer/test_image_processing_segformer.py
+++ b/tests/models/segformer/test_image_processing_segformer.py
@@ -22,8 +22,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -96,7 +95,7 @@ def prepare_semantic_batch_inputs():
 
 @require_torch
 @require_vision
-class SegformerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class SegformerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = SegformerFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/swin2sr/test_image_processing_swin2sr.py b/tests/models/swin2sr/test_image_processing_swin2sr.py
index 393a44ecface..488f55714ec2 100644
--- a/tests/models/swin2sr/test_image_processing_swin2sr.py
+++ b/tests/models/swin2sr/test_image_processing_swin2sr.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import ImageProcessingSavingTestMixin
 
 
 if is_torch_available():
@@ -100,7 +100,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class Swin2SRImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class Swin2SRImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = Swin2SRImageProcessor if is_vision_available() else None
 
diff --git a/tests/models/videomae/test_image_processing_videomae.py b/tests/models/videomae/test_image_processing_videomae.py
index 025c39ef97f8..98a60ac39e56 100644
--- a/tests/models/videomae/test_image_processing_videomae.py
+++ b/tests/models/videomae/test_image_processing_videomae.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_video_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_video_inputs
 
 
 if is_torch_available():
@@ -81,7 +80,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class VideoMAEFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class VideoMAEFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = VideoMAEFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/vilt/test_image_processing_vilt.py b/tests/models/vilt/test_image_processing_vilt.py
index 5d7be90a7475..2c75ddee06d9 100644
--- a/tests/models/vilt/test_image_processing_vilt.py
+++ b/tests/models/vilt/test_image_processing_vilt.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -117,7 +116,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class ViltFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ViltFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ViltFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/vit/test_image_processing_vit.py b/tests/models/vit/test_image_processing_vit.py
index a0db60887e40..f358e2a39e03 100644
--- a/tests/models/vit/test_image_processing_vit.py
+++ b/tests/models/vit/test_image_processing_vit.py
@@ -21,8 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -74,7 +73,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class ViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py
index 4e22baa4d668..2c82ac6ecbe7 100644
--- a/tests/models/yolos/test_image_processing_yolos.py
+++ b/tests/models/yolos/test_image_processing_yolos.py
@@ -23,8 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
-from ...test_image_processing_common import prepare_image_inputs
+from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -115,7 +114,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class YolosFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
+class YolosFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = YolosFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py
index 98f143506bc8..5c60cf58ac25 100644
--- a/tests/test_feature_extraction_common.py
+++ b/tests/test_feature_extraction_common.py
@@ -25,16 +25,7 @@
 from huggingface_hub import HfFolder, delete_repo, set_access_token
 from requests.exceptions import HTTPError
 from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
-from transformers.testing_utils import (
-    TOKEN,
-    USER,
-    check_json_file_has_correct_format,
-    get_tests_dir,
-    is_staging_test,
-    require_torch,
-    require_vision,
-)
-from transformers.utils import is_torch_available, is_vision_available
+from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test
 
 
 sys.path.append(str(Path(__file__).parent.parent / "utils"))
@@ -42,14 +33,6 @@
 from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402
 
 
-if is_torch_available():
-    import numpy as np
-    import torch
-
-if is_vision_available():
-    from PIL import Image
-
-
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")
 
 
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 82501d98bc67..6c2d28a98263 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -354,6 +354,7 @@ def create_reverse_dependency_map():
     "feature_extraction_utils.py": "test_feature_extraction_common.py",
     "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"],
     "image_transforms.py": "test_image_transforms.py",
+    "image_processing_utils.py": ["test_image_processing_common.py", "utils/test_image_processing_utils.py"],
     "utils/generic.py": ["utils/test_file_utils.py", "utils/test_model_output.py", "utils/test_generic.py"],
     "utils/hub.py": "utils/test_hub_utils.py",
     "modelcard.py": "utils/test_model_card.py",

From 7b936c9fd5509c0e0a83ce24ea1cdfb4eea6b2bd Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Thu, 19 Jan 2023 17:01:13 +0000
Subject: [PATCH 4/8] Revert but keep test fetcher update

---
 .../models/beit/test_image_processing_beit.py |  5 +++--
 .../models/blip/test_image_processing_blip.py |  6 +++---
 .../test_image_processing_chinese_clip.py     |  6 +++---
 .../models/clip/test_image_processing_clip.py |  6 +++---
 .../test_image_processing_conditional_detr.py |  5 +++--
 .../test_image_processing_convnext.py         |  5 +++--
 .../test_image_processing_deformable_detr.py  |  5 +++--
 .../models/deit/test_image_processing_deit.py |  5 +++--
 .../models/detr/test_image_processing_detr.py |  5 +++--
 .../donut/test_image_processing_donut.py      |  5 +++--
 tests/models/dpt/test_image_processing_dpt.py |  5 +++--
 .../flava/test_image_processing_flava.py      |  5 +++--
 .../models/glpn/test_image_processing_glpn.py |  5 +++--
 .../test_image_processing_imagegpt.py         |  4 ++--
 .../test_image_processing_layoutlmv2.py       |  5 +++--
 .../test_image_processing_layoutlmv3.py       |  5 +++--
 .../levit/test_image_processing_levit.py      |  5 +++--
 .../test_image_processing_maskformer.py       |  5 +++--
 .../test_image_processing_mobilenet_v1.py     |  5 +++--
 .../test_image_processing_mobilenet_v2.py     |  5 +++--
 .../test_image_processing_mobilevit.py        |  5 +++--
 .../test_image_processing_oneformer.py        |  4 ++--
 .../owlvit/test_image_processing_owlvit.py    |  5 +++--
 .../test_image_processing_poolformer.py       |  5 +++--
 .../test_image_processing_segformer.py        |  5 +++--
 .../swin2sr/test_image_processing_swin2sr.py  |  4 ++--
 .../test_image_processing_videomae.py         |  5 +++--
 .../models/vilt/test_image_processing_vilt.py |  5 +++--
 tests/models/vit/test_image_processing_vit.py |  5 +++--
 .../yolos/test_image_processing_yolos.py      |  5 +++--
 tests/test_feature_extraction_common.py       | 19 ++++++++++++++++++-
 utils/tests_fetcher.py                        |  2 +-
 32 files changed, 106 insertions(+), 65 deletions(-)

diff --git a/tests/models/beit/test_image_processing_beit.py b/tests/models/beit/test_image_processing_beit.py
index ad6b2ae72331..b499f008457b 100644
--- a/tests/models/beit/test_image_processing_beit.py
+++ b/tests/models/beit/test_image_processing_beit.py
@@ -22,7 +22,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -104,7 +105,7 @@ def prepare_semantic_batch_inputs():
 
 @require_torch
 @require_vision
-class BeitFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class BeitFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BeitFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/blip/test_image_processing_blip.py b/tests/models/blip/test_image_processing_blip.py
index 1becb12a2243..ea31038b14ab 100644
--- a/tests/models/blip/test_image_processing_blip.py
+++ b/tests/models/blip/test_image_processing_blip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
 
 
 if is_torch_available():
@@ -109,7 +109,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class BlipImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class BlipImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BlipImageProcessor if is_vision_available() else None
 
@@ -231,7 +231,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class BlipImageProcessingTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
+class BlipImageProcessingTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = BlipImageProcessor if is_vision_available() else None
 
diff --git a/tests/models/chinese_clip/test_image_processing_chinese_clip.py b/tests/models/chinese_clip/test_image_processing_chinese_clip.py
index 9b16f8525ff3..616dfa3ffc7a 100644
--- a/tests/models/chinese_clip/test_image_processing_chinese_clip.py
+++ b/tests/models/chinese_clip/test_image_processing_chinese_clip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
 
 
 if is_torch_available():
@@ -113,7 +113,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class ChineseCLIPFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ChineseCLIPFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ChineseCLIPFeatureExtractor if is_vision_available() else None
 
@@ -246,7 +246,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class ChineseCLIPFeatureExtractionTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ChineseCLIPFeatureExtractionTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ChineseCLIPFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/clip/test_image_processing_clip.py b/tests/models/clip/test_image_processing_clip.py
index 0dd2d7e7646e..8f29b63bbb55 100644
--- a/tests/models/clip/test_image_processing_clip.py
+++ b/tests/models/clip/test_image_processing_clip.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
 
 
 if is_torch_available():
@@ -113,7 +113,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class CLIPFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class CLIPFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = CLIPFeatureExtractor if is_vision_available() else None
 
@@ -246,7 +246,7 @@ def test_call_pytorch(self):
 
 @require_torch
 @require_vision
-class CLIPFeatureExtractionTestFourChannels(ImageProcessingSavingTestMixin, unittest.TestCase):
+class CLIPFeatureExtractionTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = CLIPFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/conditional_detr/test_image_processing_conditional_detr.py b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
index 038e5d52e118..b4e6f46d3e9e 100644
--- a/tests/models/conditional_detr/test_image_processing_conditional_detr.py
+++ b/tests/models/conditional_detr/test_image_processing_conditional_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -114,7 +115,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class ConditionalDetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ConditionalDetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ConditionalDetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py
index 053189701210..4fd62fc51d19 100644
--- a/tests/models/convnext/test_image_processing_convnext.py
+++ b/tests/models/convnext/test_image_processing_convnext.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -76,7 +77,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ConvNextFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ConvNextFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ConvNextFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/deformable_detr/test_image_processing_deformable_detr.py b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
index f582d5553995..bc6368953949 100644
--- a/tests/models/deformable_detr/test_image_processing_deformable_detr.py
+++ b/tests/models/deformable_detr/test_image_processing_deformable_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -114,7 +115,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class DeformableDetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class DeformableDetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DeformableDetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/deit/test_image_processing_deit.py b/tests/models/deit/test_image_processing_deit.py
index 18508df5d45c..db1e42f77109 100644
--- a/tests/models/deit/test_image_processing_deit.py
+++ b/tests/models/deit/test_image_processing_deit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -81,7 +82,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DeiTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class DeiTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DeiTFeatureExtractor if is_vision_available() else None
     test_cast_dtype = True
diff --git a/tests/models/detr/test_image_processing_detr.py b/tests/models/detr/test_image_processing_detr.py
index 1da7ae101240..253ffb7c2972 100644
--- a/tests/models/detr/test_image_processing_detr.py
+++ b/tests/models/detr/test_image_processing_detr.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -114,7 +115,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class DetrFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DetrFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/donut/test_image_processing_donut.py b/tests/models/donut/test_image_processing_donut.py
index 81dc4389a1f0..550d166e460d 100644
--- a/tests/models/donut/test_image_processing_donut.py
+++ b/tests/models/donut/test_image_processing_donut.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import is_flaky, require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -81,7 +82,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DonutFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class DonutFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DonutFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/dpt/test_image_processing_dpt.py b/tests/models/dpt/test_image_processing_dpt.py
index e17abce74ae6..0bbeb173e597 100644
--- a/tests/models/dpt/test_image_processing_dpt.py
+++ b/tests/models/dpt/test_image_processing_dpt.py
@@ -21,7 +21,8 @@
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.testing_utils import require_torch, require_vision
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -73,7 +74,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class DPTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class DPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = DPTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/flava/test_image_processing_flava.py b/tests/models/flava/test_image_processing_flava.py
index 939520e780ba..28718748200d 100644
--- a/tests/models/flava/test_image_processing_flava.py
+++ b/tests/models/flava/test_image_processing_flava.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -159,7 +160,7 @@ def get_expected_codebook_image_size(self):
 
 @require_torch
 @require_vision
-class FlavaFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class FlavaFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = FlavaFeatureExtractor if is_vision_available() else None
     maxDiff = None
diff --git a/tests/models/glpn/test_image_processing_glpn.py b/tests/models/glpn/test_image_processing_glpn.py
index bbbd54269fc9..31e527761771 100644
--- a/tests/models/glpn/test_image_processing_glpn.py
+++ b/tests/models/glpn/test_image_processing_glpn.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -66,7 +67,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class GLPNFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class GLPNFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = GLPNFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/imagegpt/test_image_processing_imagegpt.py b/tests/models/imagegpt/test_image_processing_imagegpt.py
index 2ef41cfc5df8..465a6015a39a 100644
--- a/tests/models/imagegpt/test_image_processing_imagegpt.py
+++ b/tests/models/imagegpt/test_image_processing_imagegpt.py
@@ -25,7 +25,7 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
 
 
 if is_torch_available():
@@ -78,7 +78,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ImageGPTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ImageGPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ImageGPTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
index 112d40cca383..4423d33376e4 100644
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -63,7 +64,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_pytesseract
-class LayoutLMv2FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class LayoutLMv2FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LayoutLMv2FeatureExtractor if is_pytesseract_available() else None
 
diff --git a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
index 6d9f4fd58b6f..829fc8d79dde 100644
--- a/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_image_processing_layoutlmv3.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_pytesseract, require_torch
 from transformers.utils import is_pytesseract_available, is_torch_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -63,7 +64,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_pytesseract
-class LayoutLMv3FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class LayoutLMv3FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LayoutLMv3FeatureExtractor if is_pytesseract_available() else None
 
diff --git a/tests/models/levit/test_image_processing_levit.py b/tests/models/levit/test_image_processing_levit.py
index 912bb9deb545..76f3c66e1ade 100644
--- a/tests/models/levit/test_image_processing_levit.py
+++ b/tests/models/levit/test_image_processing_levit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -80,7 +81,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class LevitFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class LevitFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = LevitFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/maskformer/test_image_processing_maskformer.py b/tests/models/maskformer/test_image_processing_maskformer.py
index ccad43d4185a..f8ddf8c9dc03 100644
--- a/tests/models/maskformer/test_image_processing_maskformer.py
+++ b/tests/models/maskformer/test_image_processing_maskformer.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -130,7 +131,7 @@ def get_fake_maskformer_outputs(self):
 
 @require_torch
 @require_vision
-class MaskFormerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MaskFormerFeatureExtractor if (is_vision_available() and is_torch_available()) else None
 
diff --git a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
index c0b6f2979405..383f91c554f8 100644
--- a/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
+++ b/tests/models/mobilenet_v1/test_image_processing_mobilenet_v1.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -71,7 +72,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileNetV1FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class MobileNetV1FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileNetV1FeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
index aa35b62383da..e207932e38e0 100644
--- a/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
+++ b/tests/models/mobilenet_v2/test_image_processing_mobilenet_v2.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -71,7 +72,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileNetV2FeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class MobileNetV2FeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileNetV2FeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/mobilevit/test_image_processing_mobilevit.py b/tests/models/mobilevit/test_image_processing_mobilevit.py
index ce3d07a3ad23..a22fc2c1d541 100644
--- a/tests/models/mobilevit/test_image_processing_mobilevit.py
+++ b/tests/models/mobilevit/test_image_processing_mobilevit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -74,7 +75,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class MobileViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class MobileViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = MobileViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py
index 8faa441b5184..f34ae080cf96 100644
--- a/tests/models/oneformer/test_image_processing_oneformer.py
+++ b/tests/models/oneformer/test_image_processing_oneformer.py
@@ -23,7 +23,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
 
 
 if is_torch_available():
@@ -155,7 +155,7 @@ def get_fake_oneformer_outputs(self):
 
 @require_torch
 @require_vision
-class OneFormerImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class OneFormerImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
     image_processing_class = OneFormerImageProcessor if (is_vision_available() and is_torch_available()) else None
     # only for test_feat_extracttion_common.test_feat_extract_to_json_string
     feature_extraction_class = image_processing_class
diff --git a/tests/models/owlvit/test_image_processing_owlvit.py b/tests/models/owlvit/test_image_processing_owlvit.py
index 77e3ebc52049..bf2cd8d666d2 100644
--- a/tests/models/owlvit/test_image_processing_owlvit.py
+++ b/tests/models/owlvit/test_image_processing_owlvit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -81,7 +82,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class OwlViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class OwlViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = OwlViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/poolformer/test_image_processing_poolformer.py b/tests/models/poolformer/test_image_processing_poolformer.py
index 7f999c9c13f3..47e583a3211a 100644
--- a/tests/models/poolformer/test_image_processing_poolformer.py
+++ b/tests/models/poolformer/test_image_processing_poolformer.py
@@ -20,7 +20,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -77,7 +78,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class PoolFormerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class PoolFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = PoolFormerFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/segformer/test_image_processing_segformer.py b/tests/models/segformer/test_image_processing_segformer.py
index de0c2d2ac203..a104fc2f4835 100644
--- a/tests/models/segformer/test_image_processing_segformer.py
+++ b/tests/models/segformer/test_image_processing_segformer.py
@@ -22,7 +22,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -95,7 +96,7 @@ def prepare_semantic_batch_inputs():
 
 @require_torch
 @require_vision
-class SegformerFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class SegformerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = SegformerFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/swin2sr/test_image_processing_swin2sr.py b/tests/models/swin2sr/test_image_processing_swin2sr.py
index 488f55714ec2..393a44ecface 100644
--- a/tests/models/swin2sr/test_image_processing_swin2sr.py
+++ b/tests/models/swin2sr/test_image_processing_swin2sr.py
@@ -21,7 +21,7 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
 
 
 if is_torch_available():
@@ -100,7 +100,7 @@ def prepare_inputs(self, equal_resolution=False, numpify=False, torchify=False):
 
 @require_torch
 @require_vision
-class Swin2SRImageProcessingTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class Swin2SRImageProcessingTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = Swin2SRImageProcessor if is_vision_available() else None
 
diff --git a/tests/models/videomae/test_image_processing_videomae.py b/tests/models/videomae/test_image_processing_videomae.py
index 98a60ac39e56..025c39ef97f8 100644
--- a/tests/models/videomae/test_image_processing_videomae.py
+++ b/tests/models/videomae/test_image_processing_videomae.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_video_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_video_inputs
 
 
 if is_torch_available():
@@ -80,7 +81,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class VideoMAEFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class VideoMAEFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = VideoMAEFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/vilt/test_image_processing_vilt.py b/tests/models/vilt/test_image_processing_vilt.py
index 2c75ddee06d9..5d7be90a7475 100644
--- a/tests/models/vilt/test_image_processing_vilt.py
+++ b/tests/models/vilt/test_image_processing_vilt.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -116,7 +117,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class ViltFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ViltFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ViltFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/vit/test_image_processing_vit.py b/tests/models/vit/test_image_processing_vit.py
index f358e2a39e03..a0db60887e40 100644
--- a/tests/models/vit/test_image_processing_vit.py
+++ b/tests/models/vit/test_image_processing_vit.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -73,7 +74,7 @@ def prepare_feat_extract_dict(self):
 
 @require_torch
 @require_vision
-class ViTFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class ViTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = ViTFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/models/yolos/test_image_processing_yolos.py b/tests/models/yolos/test_image_processing_yolos.py
index 2c82ac6ecbe7..4e22baa4d668 100644
--- a/tests/models/yolos/test_image_processing_yolos.py
+++ b/tests/models/yolos/test_image_processing_yolos.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision, slow
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_image_processing_common import ImageProcessingSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
@@ -114,7 +115,7 @@ def get_expected_values(self, image_inputs, batched=False):
 
 @require_torch
 @require_vision
-class YolosFeatureExtractionTest(ImageProcessingSavingTestMixin, unittest.TestCase):
+class YolosFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase):
 
     feature_extraction_class = YolosFeatureExtractor if is_vision_available() else None
 
diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py
index 5c60cf58ac25..98f143506bc8 100644
--- a/tests/test_feature_extraction_common.py
+++ b/tests/test_feature_extraction_common.py
@@ -25,7 +25,16 @@
 from huggingface_hub import HfFolder, delete_repo, set_access_token
 from requests.exceptions import HTTPError
 from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
-from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test
+from transformers.testing_utils import (
+    TOKEN,
+    USER,
+    check_json_file_has_correct_format,
+    get_tests_dir,
+    is_staging_test,
+    require_torch,
+    require_vision,
+)
+from transformers.utils import is_torch_available, is_vision_available
 
 
 sys.path.append(str(Path(__file__).parent.parent / "utils"))
@@ -33,6 +42,14 @@
 from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402
 
 
+if is_torch_available():
+    import numpy as np
+    import torch
+
+if is_vision_available():
+    from PIL import Image
+
+
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")
 
 
diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py
index 6c2d28a98263..d388c11361e7 100644
--- a/utils/tests_fetcher.py
+++ b/utils/tests_fetcher.py
@@ -353,8 +353,8 @@ def create_reverse_dependency_map():
     "feature_extraction_sequence_utils.py": "test_sequence_feature_extraction_common.py",
     "feature_extraction_utils.py": "test_feature_extraction_common.py",
     "file_utils.py": ["utils/test_file_utils.py", "utils/test_model_output.py"],
-    "image_transforms.py": "test_image_transforms.py",
     "image_processing_utils.py": ["test_image_processing_common.py", "utils/test_image_processing_utils.py"],
+    "image_transforms.py": "test_image_transforms.py",
     "utils/generic.py": ["utils/test_file_utils.py", "utils/test_model_output.py", "utils/test_generic.py"],
     "utils/hub.py": "utils/test_hub_utils.py",
     "modelcard.py": "utils/test_model_card.py",

From 743ae6813c2dff94ff20c98110e1181feb95171b Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Fri, 20 Jan 2023 14:43:12 +0000
Subject: [PATCH 5/8] Fix imports

---
 .../oneformer/test_processor_oneformer.py     |  2 +-
 tests/test_feature_extraction_common.py       | 19 +------------------
 2 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/tests/models/oneformer/test_processor_oneformer.py b/tests/models/oneformer/test_processor_oneformer.py
index 72d940df8b18..5056d682832d 100644
--- a/tests/models/oneformer/test_processor_oneformer.py
+++ b/tests/models/oneformer/test_processor_oneformer.py
@@ -26,7 +26,7 @@
 from transformers.testing_utils import check_json_file_has_correct_format, require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import prepare_image_inputs
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/test_feature_extraction_common.py b/tests/test_feature_extraction_common.py
index 98f143506bc8..5c60cf58ac25 100644
--- a/tests/test_feature_extraction_common.py
+++ b/tests/test_feature_extraction_common.py
@@ -25,16 +25,7 @@
 from huggingface_hub import HfFolder, delete_repo, set_access_token
 from requests.exceptions import HTTPError
 from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor
-from transformers.testing_utils import (
-    TOKEN,
-    USER,
-    check_json_file_has_correct_format,
-    get_tests_dir,
-    is_staging_test,
-    require_torch,
-    require_vision,
-)
-from transformers.utils import is_torch_available, is_vision_available
+from transformers.testing_utils import TOKEN, USER, check_json_file_has_correct_format, get_tests_dir, is_staging_test
 
 
 sys.path.append(str(Path(__file__).parent.parent / "utils"))
@@ -42,14 +33,6 @@
 from test_module.custom_feature_extraction import CustomFeatureExtractor  # noqa E402
 
 
-if is_torch_available():
-    import numpy as np
-    import torch
-
-if is_vision_available():
-    from PIL import Image
-
-
 SAMPLE_FEATURE_EXTRACTION_CONFIG_DIR = get_tests_dir("fixtures")
 
 

From ac87fe9baa572fbae47d13d5499a32c95c6fd9d1 Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Fri, 20 Jan 2023 15:34:22 +0000
Subject: [PATCH 6/8] Fix all imports

---
 .../efficientformer/test_image_processing_efficientformer.py   | 3 ++-
 tests/models/oneformer/test_image_processing_oneformer.py      | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/models/efficientformer/test_image_processing_efficientformer.py b/tests/models/efficientformer/test_image_processing_efficientformer.py
index c672b15be18a..0a5255056461 100644
--- a/tests/models/efficientformer/test_image_processing_efficientformer.py
+++ b/tests/models/efficientformer/test_image_processing_efficientformer.py
@@ -21,7 +21,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():
diff --git a/tests/models/oneformer/test_image_processing_oneformer.py b/tests/models/oneformer/test_image_processing_oneformer.py
index f34ae080cf96..79c6d82c3f42 100644
--- a/tests/models/oneformer/test_image_processing_oneformer.py
+++ b/tests/models/oneformer/test_image_processing_oneformer.py
@@ -23,7 +23,8 @@
 from transformers.testing_utils import require_torch, require_vision
 from transformers.utils import is_torch_available, is_vision_available
 
-from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin, prepare_image_inputs
+from ...test_feature_extraction_common import FeatureExtractionSavingTestMixin
+from ...test_image_processing_common import prepare_image_inputs
 
 
 if is_torch_available():

From 26885de9246eae06a19f4e316b5546cfbd1b437a Mon Sep 17 00:00:00 2001
From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com>
Date: Fri, 20 Jan 2023 16:48:13 +0000
Subject: [PATCH 7/8] Formatting fix

---
 tests/test_image_processing_common.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
index 35fbe83e695f..183267732551 100644
--- a/tests/test_image_processing_common.py
+++ b/tests/test_image_processing_common.py
@@ -74,15 +74,7 @@ def prepare_image_inputs(image_processor_tester, equal_resolution=False, numpify
                 min_resolution = max(image_processor_tester.size_divisor, min_resolution)
             width, height = np.random.choice(np.arange(min_resolution, image_processor_tester.max_resolution), 2)
         image_inputs.append(
-            np.random.randint(
-                255,
-                size=(
-                    image_processor_tester.num_channels,
-                    width,
-                    height,
-                ),
-                dtype=np.uint8,
-            )
+            np.random.randint(255, size=(image_processor_tester.num_channels, width, height), dtype=np.uint8)
         )
 
     if not numpify and not torchify:

From 997cbebf3483d500de8f85cc8834b704f6b410be Mon Sep 17 00:00:00 2001
From: amyeroberts <22614925+amyeroberts@users.noreply.github.com>
Date: Fri, 20 Jan 2023 17:02:15 +0000
Subject: [PATCH 8/8] Update tests/test_image_processing_common.py

---
 tests/test_image_processing_common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
index 183267732551..d8485e3853d8 100644
--- a/tests/test_image_processing_common.py
+++ b/tests/test_image_processing_common.py
@@ -1,5 +1,5 @@
 # coding=utf-8
-# Copyright 2021 HuggingFace Inc.
+# Copyright 2023 HuggingFace Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.