From ca8028b6465df1ec042117ba569a3dd0365d940c Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Wed, 14 Dec 2022 14:31:14 +0100
Subject: [PATCH 1/7] Add mapping

---
 docs/source/en/model_doc/auto.mdx             |  4 +++
 src/transformers/__init__.py                  |  4 +++
 src/transformers/models/auto/__init__.py      |  4 +++
 src/transformers/models/auto/modeling_auto.py | 20 ++++++++++++
 .../models/maskformer/__init__.py             |  4 +--
 .../models/maskformer/modeling_maskformer.py  | 31 +++++++++++++------
 src/transformers/utils/dummy_pt_objects.py    | 10 ++++++
 7 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/docs/source/en/model_doc/auto.mdx b/docs/source/en/model_doc/auto.mdx
index 7957f453a2fb..b39920151db4 100644
--- a/docs/source/en/model_doc/auto.mdx
+++ b/docs/source/en/model_doc/auto.mdx
@@ -254,6 +254,10 @@ The following auto classes are available for the following computer vision tasks
 
 [[autodoc]] AutoModelForInstanceSegmentation
 
+### AutoModelForUniversalSegmentation
+
+[[autodoc]] AutoModelForUniversalSegmentation
+
 ### AutoModelForZeroShotObjectDetection
 
 [[autodoc]] AutoModelForZeroShotObjectDetection
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index e6758c7df38f..085528a51c55 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -943,6 +943,7 @@
             "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
             "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
             "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
+            "MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING",
             "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING",
             "MODEL_FOR_VISION_2_SEQ_MAPPING",
             "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING",
@@ -974,6 +975,7 @@
             "AutoModelForSpeechSeq2Seq",
             "AutoModelForTableQuestionAnswering",
             "AutoModelForTokenClassification",
+            "AutoModelForUniversalSegmentation",
             "AutoModelForVideoClassification",
             "AutoModelForVision2Seq",
             "AutoModelForVisualQuestionAnswering",
@@ -4112,6 +4114,7 @@
             MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
             MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+            MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
             MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING,
             MODEL_FOR_VISION_2_SEQ_MAPPING,
             MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
@@ -4143,6 +4146,7 @@
             AutoModelForSpeechSeq2Seq,
             AutoModelForTableQuestionAnswering,
             AutoModelForTokenClassification,
+            AutoModelForUniversalSegmentation,
             AutoModelForVideoClassification,
             AutoModelForVision2Seq,
             AutoModelForVisualQuestionAnswering,
diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py
index a6ee30366b39..da8ceb8e7e62 100644
--- a/src/transformers/models/auto/__init__.py
+++ b/src/transformers/models/auto/__init__.py
@@ -67,6 +67,7 @@
         "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
         "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
         "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
+        "MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING",
         "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING",
         "MODEL_FOR_VISION_2_SEQ_MAPPING",
         "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING",
@@ -97,6 +98,7 @@
         "AutoModelForSpeechSeq2Seq",
         "AutoModelForTableQuestionAnswering",
         "AutoModelForTokenClassification",
+        "AutoModelForUniversalSegmentation",
         "AutoModelForVideoClassification",
         "AutoModelForVision2Seq",
         "AutoModelForVisualQuestionAnswering",
@@ -222,6 +224,7 @@
             MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
             MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
             MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
+            MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
             MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING,
             MODEL_FOR_VISION_2_SEQ_MAPPING,
             MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING,
@@ -253,6 +256,7 @@
             AutoModelForSpeechSeq2Seq,
             AutoModelForTableQuestionAnswering,
             AutoModelForTokenClassification,
+            AutoModelForUniversalSegmentation,
             AutoModelForVideoClassification,
             AutoModelForVision2Seq,
             AutoModelForVisualQuestionAnswering,
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index c8dcc9aed1e7..ac4ff3e7df63 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -434,10 +434,18 @@
 MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict(
     [
         # Model for Instance Segmentation mapping
+        # MaskFormerForInstanceSegmentation can be removed from this mapping in v5
         ("maskformer", "MaskFormerForInstanceSegmentation"),
     ]
 )
 
+MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict(
+    [
+        # Model for Universal Segmentation mapping
+        ("maskformer", "MaskFormerForUniversalSegmentation"),
+    ]
+)
+
 MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict(
     [
         ("timesformer", "TimesformerForVideoClassification"),
@@ -891,6 +899,9 @@
 MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping(
     CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES
 )
+MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = _LazyAutoMapping(
+    CONFIG_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES
+)
 MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping(
     CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES
 )
@@ -1082,6 +1093,15 @@ class AutoModelForSemanticSegmentation(_BaseAutoModelClass):
 )
 
 
+class AutoModelForUniversalSegmentation(_BaseAutoModelClass):
+    _model_mapping = MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING
+
+
+AutoModelForUniversalSegmentation = auto_class_update(
+    AutoModelForUniversalSegmentation, head_doc="universal image segmentation"
+)
+
+
 class AutoModelForInstanceSegmentation(_BaseAutoModelClass):
     _model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING
 
diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py
index ba6452c7c405..9c07f86e8219 100644
--- a/src/transformers/models/maskformer/__init__.py
+++ b/src/transformers/models/maskformer/__init__.py
@@ -43,7 +43,7 @@
 else:
     _import_structure["modeling_maskformer"] = [
         "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
-        "MaskFormerForInstanceSegmentation",
+        "MaskFormerForUniversalSegmentation",
         "MaskFormerModel",
         "MaskFormerPreTrainedModel",
     ]
@@ -73,7 +73,7 @@
     else:
         from .modeling_maskformer import (
             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
-            MaskFormerForInstanceSegmentation,
+            MaskFormerForUniversalSegmentation,
             MaskFormerModel,
             MaskFormerPreTrainedModel,
         )
diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py
index 298d10879a2f..8846439034ba 100644
--- a/src/transformers/models/maskformer/modeling_maskformer.py
+++ b/src/transformers/models/maskformer/modeling_maskformer.py
@@ -16,6 +16,7 @@
 
 import math
 import random
+import warnings
 from dataclasses import dataclass
 from numbers import Number
 from typing import Dict, List, Optional, Tuple
@@ -188,9 +189,9 @@ class MaskFormerModelOutput(ModelOutput):
 
 
 @dataclass
-class MaskFormerForInstanceSegmentationOutput(ModelOutput):
+class MaskFormerForUniversalSegmentationOutput(ModelOutput):
     """
-    Class for outputs of [`MaskFormerForInstanceSegmentation`].
+    Class for outputs of [`MaskFormerForUniversalSegmentation`].
 
     This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or
     [`~MaskFormerImageProcessor.post_process_instance_segmentation`] or
@@ -1633,7 +1634,7 @@ def forward(
         return output
 
 
-class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
+class MaskFormerForUniversalSegmentation(MaskFormerPreTrainedModel):
     def __init__(self, config: MaskFormerConfig):
         super().__init__(config)
         self.model = MaskFormerModel(config)
@@ -1715,7 +1716,7 @@ def get_logits(self, outputs: MaskFormerModelOutput) -> Tuple[Tensor, Tensor, Di
         return class_queries_logits, masks_queries_logits, auxiliary_logits
 
     @add_start_docstrings_to_model_forward(MASKFORMER_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=MaskFormerForInstanceSegmentationOutput, config_class=_CONFIG_FOR_DOC)
+    @replace_return_docstrings(output_type=MaskFormerForUniversalSegmentationOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
         pixel_values: Tensor,
@@ -1726,7 +1727,7 @@ def forward(
         output_hidden_states: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
         return_dict: Optional[bool] = None,
-    ) -> MaskFormerForInstanceSegmentationOutput:
+    ) -> MaskFormerForUniversalSegmentationOutput:
         r"""
         mask_labels (`List[torch.Tensor]`, *optional*):
             List of mask labels of shape `(num_labels, height, width)` to be fed to a model
@@ -1741,13 +1742,13 @@ def forward(
         Semantic segmentation example:
 
         ```python
-        >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
+        >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation
         >>> from PIL import Image
         >>> import requests
 
         >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
         >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
-        >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
+        >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
 
         >>> url = (
         ...     "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg"
@@ -1774,13 +1775,13 @@ def forward(
         Panoptic segmentation example:
 
         ```python
-        >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
+        >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation
         >>> from PIL import Image
         >>> import requests
 
         >>> # load MaskFormer fine-tuned on COCO panoptic segmentation
         >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco")
-        >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
+        >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
 
         >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
@@ -1832,7 +1833,7 @@ def forward(
         if not output_auxiliary_logits:
             auxiliary_logits = None
 
-        output = MaskFormerForInstanceSegmentationOutput(
+        output = MaskFormerForUniversalSegmentationOutput(
             loss=loss,
             **outputs,
             class_queries_logits=class_queries_logits,
@@ -1845,3 +1846,13 @@ def forward(
             if loss is not None:
                 output = ((loss)) + output
         return output
+
+
+class MaskFormerForInstanceSegmentation(MaskFormerForUniversalSegmentation):
+    def __init__(self, *args, **kwargs) -> None:
+        warnings.warn(
+            "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of"
+            " Transformers. Please use MaskFormerForUniversalSegmentation instead.",
+            FutureWarning,
+        )
+        super().__init__(*args, **kwargs)
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index e8fcfa496932..1176f94cec33 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -446,6 +446,9 @@ def __init__(self, *args, **kwargs):
 MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = None
 
 
+MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = None
+
+
 MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = None
 
 
@@ -639,6 +642,13 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, ["torch"])
 
 
+class AutoModelForUniversalSegmentation(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class AutoModelForVideoClassification(metaclass=DummyObject):
     _backends = ["torch"]
 

From 6903b8cf8bac39da72ad4d626e05d07a06353dc5 Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Wed, 14 Dec 2022 14:37:16 +0100
Subject: [PATCH 2/7] Add mapping to pipeline

---
 src/transformers/pipelines/image_segmentation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py
index 55b2217ccde2..9fdb0dc3314d 100644
--- a/src/transformers/pipelines/image_segmentation.py
+++ b/src/transformers/pipelines/image_segmentation.py
@@ -16,6 +16,7 @@
         MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
         MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING,
         MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,
+        MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING,
     )
 
 
@@ -75,6 +76,7 @@ def __init__(self, *args, **kwargs):
                 MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()
                 + MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items()
                 + MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items()
+                + MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING.items()
             )
         )
 

From 0573266077010f1ac3e4ad7351e566ef997ffe69 Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Wed, 14 Dec 2022 19:31:09 +0100
Subject: [PATCH 3/7] Apply suggestions

---
 docs/source/en/model_doc/maskformer.mdx       |  8 +++---
 src/transformers/__init__.py                  |  2 ++
 .../models/maskformer/__init__.py             |  2 ++
 .../maskformer/configuration_maskformer.py    |  2 +-
 .../maskformer/image_processing_maskformer.py | 26 +++++++++----------
 src/transformers/utils/dummy_pt_objects.py    |  7 +++++
 .../maskformer/test_modeling_maskformer.py    | 18 ++++++-------
 utils/check_repo.py                           |  2 ++
 8 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/docs/source/en/model_doc/maskformer.mdx b/docs/source/en/model_doc/maskformer.mdx
index 4060cbab9a8f..1f095cdf3463 100644
--- a/docs/source/en/model_doc/maskformer.mdx
+++ b/docs/source/en/model_doc/maskformer.mdx
@@ -33,7 +33,7 @@ Tips:
   `get_num_masks` function inside in the `MaskFormerLoss` class of `modeling_maskformer.py`. When training on multiple nodes, this should be
   set to the average number of target masks across all nodes, as can be seen in the original implementation [here](https://github.com/facebookresearch/MaskFormer/blob/da3e60d85fdeedcb31476b5edd7d328826ce56cc/mask_former/modeling/criterion.py#L169).
 - One can use [`MaskFormerImageProcessor`] to prepare images for the model and optional targets for the model.
-- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForInstanceSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together.
+- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together.
 
 The figure below illustrates the architecture of MaskFormer. Taken from the [original paper](https://arxiv.org/abs/2107.06278).
 
@@ -51,7 +51,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The
 
 [[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
 
-[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForUniversalSegmentationOutput
 
 ## MaskFormerConfig
 
@@ -80,7 +80,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The
 [[autodoc]] MaskFormerModel
     - forward
 
-## MaskFormerForInstanceSegmentation
+## MaskFormerForUniversalSegmentation
 
-[[autodoc]] MaskFormerForInstanceSegmentation
+[[autodoc]] MaskFormerForUniversalSegmentation
     - forward
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 085528a51c55..019f709094fd 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -1661,6 +1661,7 @@
         [
             "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
             "MaskFormerForInstanceSegmentation",
+            "MaskFormerForUniversalSegmentation",
             "MaskFormerModel",
             "MaskFormerPreTrainedModel",
             "MaskFormerSwinBackbone",
@@ -4704,6 +4705,7 @@
         from .models.maskformer import (
             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
             MaskFormerForInstanceSegmentation,
+            MaskFormerForUniversalSegmentation,
             MaskFormerModel,
             MaskFormerPreTrainedModel,
             MaskFormerSwinBackbone,
diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py
index 9c07f86e8219..c6950ddaeef7 100644
--- a/src/transformers/models/maskformer/__init__.py
+++ b/src/transformers/models/maskformer/__init__.py
@@ -43,6 +43,7 @@
 else:
     _import_structure["modeling_maskformer"] = [
         "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
+        "MaskFormerForInstanceSegmentation",
         "MaskFormerForUniversalSegmentation",
         "MaskFormerModel",
         "MaskFormerPreTrainedModel",
@@ -73,6 +74,7 @@
     else:
         from .modeling_maskformer import (
             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
+            MaskFormerForInstanceSegmentation,
             MaskFormerForUniversalSegmentation,
             MaskFormerModel,
             MaskFormerPreTrainedModel,
diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py
index 655bee2b9a5f..d871fc2ab88d 100644
--- a/src/transformers/models/maskformer/configuration_maskformer.py
+++ b/src/transformers/models/maskformer/configuration_maskformer.py
@@ -53,7 +53,7 @@ class MaskFormerConfig(PretrainedConfig):
         no_object_weight (`float`, *optional*, defaults to 0.1):
             Weight to apply to the null (no object) class.
         use_auxiliary_loss(`bool`, *optional*, defaults to `False`):
-            If `True` [`MaskFormerForInstanceSegmentationOutput`] will contain the auxiliary losses computed using the
+            If `True` [`MaskFormerForUniversalSegmentationOutput`] will contain the auxiliary losses computed using the
             logits from each decoder's stage.
         backbone_config (`Dict`, *optional*):
             The configuration passed to the backbone, if unset, the configuration corresponding to
diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py
index 50cef6070028..b87854a25f54 100644
--- a/src/transformers/models/maskformer/image_processing_maskformer.py
+++ b/src/transformers/models/maskformer/image_processing_maskformer.py
@@ -54,7 +54,7 @@
 
 
 if TYPE_CHECKING:
-    from transformers import MaskFormerForInstanceSegmentationOutput
+    from transformers import MaskFormerForUniversalSegmentationOutput
 
 
 if is_torch_available():
@@ -872,15 +872,15 @@ def encode_inputs(
         return encoded_inputs
 
     def post_process_segmentation(
-        self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None
+        self, outputs: "MaskFormerForUniversalSegmentationOutput", target_size: Tuple[int, int] = None
     ) -> "torch.Tensor":
         """
-        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only
+        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image segmentation predictions. Only
         supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForInstanceSegmentationOutput`]):
-                The outputs from [`MaskFormerForInstanceSegmentation`].
+            outputs ([`MaskFormerForUniversalSegmentationOutput`]):
+                The outputs from [`MaskFormerForUniversalSegmentation`].
 
             target_size (`Tuple[int, int]`, *optional*):
                 If set, the `masks_queries_logits` will be resized to `target_size`.
@@ -923,11 +923,11 @@ def post_process_semantic_segmentation(
         self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None
     ) -> "torch.Tensor":
         """
-        Converts the output of [`MaskFormerForInstanceSegmentation`] into semantic segmentation maps. Only supports
+        Converts the output of [`MaskFormerForUniversalSegmentation`] into semantic segmentation maps. Only supports
         PyTorch.
 
         Args:
-            outputs ([`MaskFormerForInstanceSegmentation`]):
+            outputs ([`MaskFormerForUniversalSegmentation`]):
                 Raw outputs of the model.
             target_sizes (`List[Tuple[int, int]]`, *optional*):
                 List of length (batch_size), where each list item (`Tuple[int, int]]`) corresponds to the requested
@@ -979,11 +979,11 @@ def post_process_instance_segmentation(
         return_coco_annotation: Optional[bool] = False,
     ) -> List[Dict]:
         """
-        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only
-        supports PyTorch.
+        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into instance segmentation predictions.
+        Only supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForInstanceSegmentation`]):
+            outputs ([`MaskFormerForUniversalSegmentation`]):
                 Raw outputs of the model.
             threshold (`float`, *optional*, defaults to 0.5):
                 The probability score threshold to keep predicted instance masks.
@@ -1062,12 +1062,12 @@ def post_process_panoptic_segmentation(
         target_sizes: Optional[List[Tuple[int, int]]] = None,
     ) -> List[Dict]:
         """
-        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image panoptic segmentation
+        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image panoptic segmentation
         predictions. Only supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForInstanceSegmentationOutput`]):
-                The outputs from [`MaskFormerForInstanceSegmentation`].
+            outputs ([`MaskFormerForUniversalSegmentationOutput`]):
+                The outputs from [`MaskFormerForUniversalSegmentation`].
             threshold (`float`, *optional*, defaults to 0.5):
                 The probability score threshold to keep predicted instance masks.
             mask_threshold (`float`, *optional*, defaults to 0.5):
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index 1176f94cec33..edcef5050b8e 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -3450,6 +3450,13 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, ["torch"])
 
 
+class MaskFormerForUniversalSegmentation(metaclass=DummyObject):
+    _backends = ["torch"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["torch"])
+
+
 class MaskFormerModel(metaclass=DummyObject):
     _backends = ["torch"]
 
diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py
index 52c811591bba..4e834d1fdbee 100644
--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -31,7 +31,7 @@
 if is_torch_available():
     import torch
 
-    from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel
+    from transformers import MaskFormerForUniversalSegmentation, MaskFormerModel
 
     if is_vision_available():
         from transformers import MaskFormerFeatureExtractor
@@ -135,7 +135,7 @@ def create_and_check_maskformer_model(self, config, pixel_values, pixel_mask, ou
     def create_and_check_maskformer_instance_segmentation_head_model(
         self, config, pixel_values, pixel_mask, mask_labels, class_labels
     ):
-        model = MaskFormerForInstanceSegmentation(config=config)
+        model = MaskFormerForUniversalSegmentation(config=config)
         model.to(torch_device)
         model.eval()
 
@@ -174,7 +174,7 @@ def comm_check_on_output(result):
 @require_torch
 class MaskFormerModelTest(ModelTesterMixin, unittest.TestCase):
 
-    all_model_classes = (MaskFormerModel, MaskFormerForInstanceSegmentation) if is_torch_available() else ()
+    all_model_classes = (MaskFormerModel, MaskFormerForUniversalSegmentation) if is_torch_available() else ()
 
     is_encoder_decoder = False
     test_pruning = False
@@ -245,7 +245,7 @@ def test_model_with_labels(self):
             "class_labels": torch.zeros(2, 10, device=torch_device).long(),
         }
 
-        model = MaskFormerForInstanceSegmentation(MaskFormerConfig()).to(torch_device)
+        model = MaskFormerForUniversalSegmentation(MaskFormerConfig()).to(torch_device)
         outputs = model(**inputs)
         self.assertTrue(outputs.loss is not None)
 
@@ -264,7 +264,7 @@ def test_attention_outputs(self):
     def test_training(self):
         if not self.model_tester.is_training:
             return
-        # only MaskFormerForInstanceSegmentation has the loss
+        # only MaskFormerForUniversalSegmentation has the loss
         model_class = self.all_model_classes[1]
         config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs()
 
@@ -276,7 +276,7 @@ def test_training(self):
         loss.backward()
 
     def test_retain_grad_hidden_states_attentions(self):
-        # only MaskFormerForInstanceSegmentation has the loss
+        # only MaskFormerForUniversalSegmentation has the loss
         model_class = self.all_model_classes[1]
         config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs()
         config.output_hidden_states = True
@@ -371,7 +371,7 @@ def test_inference_no_head(self):
 
     def test_inference_instance_segmentation_head(self):
         model = (
-            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
+            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
             .to(torch_device)
             .eval()
         )
@@ -415,7 +415,7 @@ def test_inference_instance_segmentation_head(self):
 
     def test_inference_instance_segmentation_head_resnet_backbone(self):
         model = (
-            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
+            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
             .to(torch_device)
             .eval()
         )
@@ -451,7 +451,7 @@ def test_inference_instance_segmentation_head_resnet_backbone(self):
 
     def test_with_segmentation_maps_and_loss(self):
         model = (
-            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
+            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
             .to(torch_device)
             .eval()
         )
diff --git a/utils/check_repo.py b/utils/check_repo.py
index c72c089d7906..07a67222a5b1 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -49,6 +49,7 @@
 # Being in this list is an exception and should **not** be the rule.
 IGNORE_NON_TESTED = PRIVATE_MODELS.copy() + [
     # models to ignore for not tested
+    "MaskFormerForInstanceSegmentation",  # This class name is deprecated, MaskFormerForUniversalSegmentation is tested
     "CLIPSegDecoder",  # Building part of bigger (tested) model.
     "TableTransformerEncoder",  # Building part of bigger (tested) model.
     "TableTransformerDecoder",  # Building part of bigger (tested) model.
@@ -614,6 +615,7 @@ def find_all_documented_objects():
     "LineByLineTextDataset",
     "LineByLineWithRefDataset",
     "LineByLineWithSOPTextDataset",
+    "MaskFormerForInstanceSegmentation",
     "PretrainedBartModel",
     "PretrainedFSMTModel",
     "SingleSentenceClassificationProcessor",

From c03702e9ab662dd23a7c33ed8ed1d7dfa52437bd Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Wed, 14 Dec 2022 20:11:25 +0100
Subject: [PATCH 4/7] Fix feature extractor tests

---
 tests/models/maskformer/test_feature_extraction_maskformer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py
index ca2f504c06c8..9285a81d50a7 100644
--- a/tests/models/maskformer/test_feature_extraction_maskformer.py
+++ b/tests/models/maskformer/test_feature_extraction_maskformer.py
@@ -32,7 +32,7 @@
     if is_vision_available():
         from transformers import MaskFormerFeatureExtractor
         from transformers.models.maskformer.image_processing_maskformer import binary_mask_to_rle
-        from transformers.models.maskformer.modeling_maskformer import MaskFormerForInstanceSegmentationOutput
+        from transformers.models.maskformer.modeling_maskformer import MaskFormerForUniversalSegmentationOutput
 
 if is_vision_available():
     from PIL import Image
@@ -121,7 +121,7 @@ def get_expected_values(self, image_inputs, batched=False):
         return expected_height, expected_width
 
     def get_fake_maskformer_outputs(self):
-        return MaskFormerForInstanceSegmentationOutput(
+        return MaskFormerForUniversalSegmentationOutput(
             # +1 for null class
             class_queries_logits=torch.randn((self.batch_size, self.num_queries, self.num_classes + 1)),
             masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)),

From 99a86905d8f4b33e4585a3a857801343f9589f4d Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Fri, 16 Dec 2022 09:33:02 +0100
Subject: [PATCH 5/7] Use ForInstance, add model to universal mapping

---
 docs/source/en/model_doc/maskformer.mdx       |  8 +++---
 src/transformers/__init__.py                  |  2 --
 src/transformers/models/auto/modeling_auto.py |  3 ++-
 .../models/maskformer/__init__.py             |  2 --
 .../maskformer/configuration_maskformer.py    |  2 +-
 .../maskformer/image_processing_maskformer.py | 26 +++++++++----------
 .../models/maskformer/modeling_maskformer.py  | 24 ++++++++---------
 src/transformers/utils/dummy_pt_objects.py    |  7 -----
 .../test_feature_extraction_maskformer.py     |  4 +--
 .../maskformer/test_modeling_maskformer.py    | 18 ++++++-------
 10 files changed, 43 insertions(+), 53 deletions(-)

diff --git a/docs/source/en/model_doc/maskformer.mdx b/docs/source/en/model_doc/maskformer.mdx
index 1f095cdf3463..4060cbab9a8f 100644
--- a/docs/source/en/model_doc/maskformer.mdx
+++ b/docs/source/en/model_doc/maskformer.mdx
@@ -33,7 +33,7 @@ Tips:
   `get_num_masks` function inside in the `MaskFormerLoss` class of `modeling_maskformer.py`. When training on multiple nodes, this should be
   set to the average number of target masks across all nodes, as can be seen in the original implementation [here](https://github.com/facebookresearch/MaskFormer/blob/da3e60d85fdeedcb31476b5edd7d328826ce56cc/mask_former/modeling/criterion.py#L169).
 - One can use [`MaskFormerImageProcessor`] to prepare images for the model and optional targets for the model.
-- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together.
+- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForInstanceSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together.
 
 The figure below illustrates the architecture of MaskFormer. Taken from the [original paper](https://arxiv.org/abs/2107.06278).
 
@@ -51,7 +51,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The
 
 [[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput
 
-[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForUniversalSegmentationOutput
+[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput
 
 ## MaskFormerConfig
 
@@ -80,7 +80,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The
 [[autodoc]] MaskFormerModel
     - forward
 
-## MaskFormerForUniversalSegmentation
+## MaskFormerForInstanceSegmentation
 
-[[autodoc]] MaskFormerForUniversalSegmentation
+[[autodoc]] MaskFormerForInstanceSegmentation
     - forward
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index 019f709094fd..085528a51c55 100644
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -1661,7 +1661,6 @@
         [
             "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
             "MaskFormerForInstanceSegmentation",
-            "MaskFormerForUniversalSegmentation",
             "MaskFormerModel",
             "MaskFormerPreTrainedModel",
             "MaskFormerSwinBackbone",
@@ -4705,7 +4704,6 @@
         from .models.maskformer import (
             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
             MaskFormerForInstanceSegmentation,
-            MaskFormerForUniversalSegmentation,
             MaskFormerModel,
             MaskFormerPreTrainedModel,
             MaskFormerSwinBackbone,
diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py
index ac4ff3e7df63..ec25db48e233 100644
--- a/src/transformers/models/auto/modeling_auto.py
+++ b/src/transformers/models/auto/modeling_auto.py
@@ -442,7 +442,8 @@
 MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict(
     [
         # Model for Universal Segmentation mapping
-        ("maskformer", "MaskFormerForUniversalSegmentation"),
+        ("detr", "DetrForSegmentation"),
+        ("maskformer", "MaskFormerForInstanceSegmentation"),
     ]
 )
 
diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py
index c6950ddaeef7..ba6452c7c405 100644
--- a/src/transformers/models/maskformer/__init__.py
+++ b/src/transformers/models/maskformer/__init__.py
@@ -44,7 +44,6 @@
     _import_structure["modeling_maskformer"] = [
         "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST",
         "MaskFormerForInstanceSegmentation",
-        "MaskFormerForUniversalSegmentation",
         "MaskFormerModel",
         "MaskFormerPreTrainedModel",
     ]
@@ -75,7 +74,6 @@
         from .modeling_maskformer import (
             MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
             MaskFormerForInstanceSegmentation,
-            MaskFormerForUniversalSegmentation,
             MaskFormerModel,
             MaskFormerPreTrainedModel,
         )
diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py
index d871fc2ab88d..655bee2b9a5f 100644
--- a/src/transformers/models/maskformer/configuration_maskformer.py
+++ b/src/transformers/models/maskformer/configuration_maskformer.py
@@ -53,7 +53,7 @@ class MaskFormerConfig(PretrainedConfig):
         no_object_weight (`float`, *optional*, defaults to 0.1):
             Weight to apply to the null (no object) class.
         use_auxiliary_loss(`bool`, *optional*, defaults to `False`):
-            If `True` [`MaskFormerForUniversalSegmentationOutput`] will contain the auxiliary losses computed using the
+            If `True` [`MaskFormerForInstanceSegmentationOutput`] will contain the auxiliary losses computed using the
             logits from each decoder's stage.
         backbone_config (`Dict`, *optional*):
             The configuration passed to the backbone, if unset, the configuration corresponding to
diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py
index b87854a25f54..50cef6070028 100644
--- a/src/transformers/models/maskformer/image_processing_maskformer.py
+++ b/src/transformers/models/maskformer/image_processing_maskformer.py
@@ -54,7 +54,7 @@
 
 
 if TYPE_CHECKING:
-    from transformers import MaskFormerForUniversalSegmentationOutput
+    from transformers import MaskFormerForInstanceSegmentationOutput
 
 
 if is_torch_available():
@@ -872,15 +872,15 @@ def encode_inputs(
         return encoded_inputs
 
     def post_process_segmentation(
-        self, outputs: "MaskFormerForUniversalSegmentationOutput", target_size: Tuple[int, int] = None
+        self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None
     ) -> "torch.Tensor":
         """
-        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image segmentation predictions. Only
+        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only
         supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForUniversalSegmentationOutput`]):
-                The outputs from [`MaskFormerForUniversalSegmentation`].
+            outputs ([`MaskFormerForInstanceSegmentationOutput`]):
+                The outputs from [`MaskFormerForInstanceSegmentation`].
 
             target_size (`Tuple[int, int]`, *optional*):
                 If set, the `masks_queries_logits` will be resized to `target_size`.
@@ -923,11 +923,11 @@ def post_process_semantic_segmentation(
         self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None
     ) -> "torch.Tensor":
         """
-        Converts the output of [`MaskFormerForUniversalSegmentation`] into semantic segmentation maps. Only supports
+        Converts the output of [`MaskFormerForInstanceSegmentation`] into semantic segmentation maps. Only supports
         PyTorch.
 
         Args:
-            outputs ([`MaskFormerForUniversalSegmentation`]):
+            outputs ([`MaskFormerForInstanceSegmentation`]):
                 Raw outputs of the model.
             target_sizes (`List[Tuple[int, int]]`, *optional*):
                 List of length (batch_size), where each list item (`Tuple[int, int]]`) corresponds to the requested
@@ -979,11 +979,11 @@ def post_process_instance_segmentation(
         return_coco_annotation: Optional[bool] = False,
     ) -> List[Dict]:
         """
-        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into instance segmentation predictions.
-        Only supports PyTorch.
+        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only
+        supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForUniversalSegmentation`]):
+            outputs ([`MaskFormerForInstanceSegmentation`]):
                 Raw outputs of the model.
             threshold (`float`, *optional*, defaults to 0.5):
                 The probability score threshold to keep predicted instance masks.
@@ -1062,12 +1062,12 @@ def post_process_panoptic_segmentation(
         target_sizes: Optional[List[Tuple[int, int]]] = None,
     ) -> List[Dict]:
         """
-        Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image panoptic segmentation
+        Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image panoptic segmentation
         predictions. Only supports PyTorch.
 
         Args:
-            outputs ([`MaskFormerForUniversalSegmentationOutput`]):
-                The outputs from [`MaskFormerForUniversalSegmentation`].
+            outputs ([`MaskFormerForInstanceSegmentationOutput`]):
+                The outputs from [`MaskFormerForInstanceSegmentation`].
             threshold (`float`, *optional*, defaults to 0.5):
                 The probability score threshold to keep predicted instance masks.
             mask_threshold (`float`, *optional*, defaults to 0.5):
diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py
index 8846439034ba..61ab637bb8f3 100644
--- a/src/transformers/models/maskformer/modeling_maskformer.py
+++ b/src/transformers/models/maskformer/modeling_maskformer.py
@@ -189,9 +189,9 @@ class MaskFormerModelOutput(ModelOutput):
 
 
 @dataclass
-class MaskFormerForUniversalSegmentationOutput(ModelOutput):
+class MaskFormerForInstanceSegmentationOutput(ModelOutput):
     """
-    Class for outputs of [`MaskFormerForUniversalSegmentation`].
+    Class for outputs of [`MaskFormerForInstanceSegmentation`].
 
     This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or
     [`~MaskFormerImageProcessor.post_process_instance_segmentation`] or
@@ -1634,7 +1634,7 @@ def forward(
         return output
 
 
-class MaskFormerForUniversalSegmentation(MaskFormerPreTrainedModel):
+class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
     def __init__(self, config: MaskFormerConfig):
         super().__init__(config)
         self.model = MaskFormerModel(config)
@@ -1716,7 +1716,7 @@ def get_logits(self, outputs: MaskFormerModelOutput) -> Tuple[Tensor, Tensor, Di
         return class_queries_logits, masks_queries_logits, auxiliary_logits
 
     @add_start_docstrings_to_model_forward(MASKFORMER_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=MaskFormerForUniversalSegmentationOutput, config_class=_CONFIG_FOR_DOC)
+    @replace_return_docstrings(output_type=MaskFormerForInstanceSegmentationOutput, config_class=_CONFIG_FOR_DOC)
     def forward(
         self,
         pixel_values: Tensor,
@@ -1727,7 +1727,7 @@ def forward(
         output_hidden_states: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
         return_dict: Optional[bool] = None,
-    ) -> MaskFormerForUniversalSegmentationOutput:
+    ) -> MaskFormerForInstanceSegmentationOutput:
         r"""
         mask_labels (`List[torch.Tensor]`, *optional*):
             List of mask labels of shape `(num_labels, height, width)` to be fed to a model
@@ -1742,13 +1742,13 @@ def forward(
         Semantic segmentation example:
 
         ```python
-        >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation
+        >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
         >>> from PIL import Image
         >>> import requests
 
         >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
         >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
-        >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
+        >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
 
         >>> url = (
         ...     "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg"
@@ -1775,13 +1775,13 @@ def forward(
         Panoptic segmentation example:
 
         ```python
-        >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation
+        >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
         >>> from PIL import Image
         >>> import requests
 
         >>> # load MaskFormer fine-tuned on COCO panoptic segmentation
         >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco")
-        >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
+        >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
 
         >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
         >>> image = Image.open(requests.get(url, stream=True).raw)
@@ -1833,7 +1833,7 @@ def forward(
         if not output_auxiliary_logits:
             auxiliary_logits = None
 
-        output = MaskFormerForUniversalSegmentationOutput(
+        output = MaskFormerForInstanceSegmentationOutput(
             loss=loss,
             **outputs,
             class_queries_logits=class_queries_logits,
@@ -1848,11 +1848,11 @@ def forward(
         return output
 
 
-class MaskFormerForInstanceSegmentation(MaskFormerForUniversalSegmentation):
+class MaskFormerForInstanceSegmentation(MaskFormerForInstanceSegmentation):
     def __init__(self, *args, **kwargs) -> None:
         warnings.warn(
             "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of"
-            " Transformers. Please use MaskFormerForUniversalSegmentation instead.",
+            " Transformers. Please use MaskFormerForInstanceSegmentation instead.",
             FutureWarning,
         )
         super().__init__(*args, **kwargs)
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
index edcef5050b8e..1176f94cec33 100644
--- a/src/transformers/utils/dummy_pt_objects.py
+++ b/src/transformers/utils/dummy_pt_objects.py
@@ -3450,13 +3450,6 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, ["torch"])
 
 
-class MaskFormerForUniversalSegmentation(metaclass=DummyObject):
-    _backends = ["torch"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["torch"])
-
-
 class MaskFormerModel(metaclass=DummyObject):
     _backends = ["torch"]
 
diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py
index 9285a81d50a7..ca2f504c06c8 100644
--- a/tests/models/maskformer/test_feature_extraction_maskformer.py
+++ b/tests/models/maskformer/test_feature_extraction_maskformer.py
@@ -32,7 +32,7 @@
     if is_vision_available():
         from transformers import MaskFormerFeatureExtractor
         from transformers.models.maskformer.image_processing_maskformer import binary_mask_to_rle
-        from transformers.models.maskformer.modeling_maskformer import MaskFormerForUniversalSegmentationOutput
+        from transformers.models.maskformer.modeling_maskformer import MaskFormerForInstanceSegmentationOutput
 
 if is_vision_available():
     from PIL import Image
@@ -121,7 +121,7 @@ def get_expected_values(self, image_inputs, batched=False):
         return expected_height, expected_width
 
     def get_fake_maskformer_outputs(self):
-        return MaskFormerForUniversalSegmentationOutput(
+        return MaskFormerForInstanceSegmentationOutput(
             # +1 for null class
             class_queries_logits=torch.randn((self.batch_size, self.num_queries, self.num_classes + 1)),
             masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)),
diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py
index 4e834d1fdbee..52c811591bba 100644
--- a/tests/models/maskformer/test_modeling_maskformer.py
+++ b/tests/models/maskformer/test_modeling_maskformer.py
@@ -31,7 +31,7 @@
 if is_torch_available():
     import torch
 
-    from transformers import MaskFormerForUniversalSegmentation, MaskFormerModel
+    from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel
 
     if is_vision_available():
         from transformers import MaskFormerFeatureExtractor
@@ -135,7 +135,7 @@ def create_and_check_maskformer_model(self, config, pixel_values, pixel_mask, ou
     def create_and_check_maskformer_instance_segmentation_head_model(
         self, config, pixel_values, pixel_mask, mask_labels, class_labels
     ):
-        model = MaskFormerForUniversalSegmentation(config=config)
+        model = MaskFormerForInstanceSegmentation(config=config)
         model.to(torch_device)
         model.eval()
 
@@ -174,7 +174,7 @@ def comm_check_on_output(result):
 @require_torch
 class MaskFormerModelTest(ModelTesterMixin, unittest.TestCase):
 
-    all_model_classes = (MaskFormerModel, MaskFormerForUniversalSegmentation) if is_torch_available() else ()
+    all_model_classes = (MaskFormerModel, MaskFormerForInstanceSegmentation) if is_torch_available() else ()
 
     is_encoder_decoder = False
     test_pruning = False
@@ -245,7 +245,7 @@ def test_model_with_labels(self):
             "class_labels": torch.zeros(2, 10, device=torch_device).long(),
         }
 
-        model = MaskFormerForUniversalSegmentation(MaskFormerConfig()).to(torch_device)
+        model = MaskFormerForInstanceSegmentation(MaskFormerConfig()).to(torch_device)
         outputs = model(**inputs)
         self.assertTrue(outputs.loss is not None)
 
@@ -264,7 +264,7 @@ def test_attention_outputs(self):
     def test_training(self):
         if not self.model_tester.is_training:
             return
-        # only MaskFormerForUniversalSegmentation has the loss
+        # only MaskFormerForInstanceSegmentation has the loss
         model_class = self.all_model_classes[1]
         config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs()
 
@@ -276,7 +276,7 @@ def test_training(self):
         loss.backward()
 
     def test_retain_grad_hidden_states_attentions(self):
-        # only MaskFormerForUniversalSegmentation has the loss
+        # only MaskFormerForInstanceSegmentation has the loss
         model_class = self.all_model_classes[1]
         config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs()
         config.output_hidden_states = True
@@ -371,7 +371,7 @@ def test_inference_no_head(self):
 
     def test_inference_instance_segmentation_head(self):
         model = (
-            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
+            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
             .to(torch_device)
             .eval()
         )
@@ -415,7 +415,7 @@ def test_inference_instance_segmentation_head(self):
 
     def test_inference_instance_segmentation_head_resnet_backbone(self):
         model = (
-            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
+            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
             .to(torch_device)
             .eval()
         )
@@ -451,7 +451,7 @@ def test_inference_instance_segmentation_head_resnet_backbone(self):
 
     def test_with_segmentation_maps_and_loss(self):
         model = (
-            MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
+            MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco")
             .to(torch_device)
             .eval()
         )

From 8706d43b806ec3271e251681b4ddfd170730b197 Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Fri, 16 Dec 2022 09:36:13 +0100
Subject: [PATCH 6/7] More fixes

---
 .../models/maskformer/modeling_maskformer.py          | 11 -----------
 utils/check_repo.py                                   |  2 --
 2 files changed, 13 deletions(-)

diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py
index 61ab637bb8f3..298d10879a2f 100644
--- a/src/transformers/models/maskformer/modeling_maskformer.py
+++ b/src/transformers/models/maskformer/modeling_maskformer.py
@@ -16,7 +16,6 @@
 
 import math
 import random
-import warnings
 from dataclasses import dataclass
 from numbers import Number
 from typing import Dict, List, Optional, Tuple
@@ -1846,13 +1845,3 @@ def forward(
             if loss is not None:
                 output = ((loss)) + output
         return output
-
-
-class MaskFormerForInstanceSegmentation(MaskFormerForInstanceSegmentation):
-    def __init__(self, *args, **kwargs) -> None:
-        warnings.warn(
-            "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of"
-            " Transformers. Please use MaskFormerForInstanceSegmentation instead.",
-            FutureWarning,
-        )
-        super().__init__(*args, **kwargs)
diff --git a/utils/check_repo.py b/utils/check_repo.py
index 07a67222a5b1..da16270acfeb 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -49,7 +49,6 @@
 # Being in this list is an exception and should **not** be the rule.
 IGNORE_NON_TESTED = PRIVATE_MODELS.copy() + [
     # models to ignore for not tested
-    "MaskFormerForInstanceSegmentation",  # This class name is deprecated, MaskFormerForUniversalSegmentation is tested
     "CLIPSegDecoder",  # Building part of bigger (tested) model.
     "TableTransformerEncoder",  # Building part of bigger (tested) model.
     "TableTransformerDecoder",  # Building part of bigger (tested) model.
@@ -240,7 +239,6 @@
     "VisualBertForMultipleChoice",
     "TFWav2Vec2ForCTC",
     "TFHubertForCTC",
-    "MaskFormerForInstanceSegmentation",
     "XCLIPVisionModel",
     "XCLIPTextModel",
 ]

From ab69b1f0e4df2a703f04501555f4a9fac5c44018 Mon Sep 17 00:00:00 2001
From: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
Date: Fri, 16 Dec 2022 12:49:39 +0100
Subject: [PATCH 7/7] =?UTF-8?q?Remove=20model=20from=20deprecated=20object?=
 =?UTF-8?q?s=C3=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 utils/check_repo.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/check_repo.py b/utils/check_repo.py
index da16270acfeb..1c2fd4b45c41 100644
--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -613,7 +613,6 @@ def find_all_documented_objects():
     "LineByLineTextDataset",
     "LineByLineWithRefDataset",
     "LineByLineWithSOPTextDataset",
-    "MaskFormerForInstanceSegmentation",
     "PretrainedBartModel",
     "PretrainedFSMTModel",
     "SingleSentenceClassificationProcessor",