From ca8028b6465df1ec042117ba569a3dd0365d940c Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Wed, 14 Dec 2022 14:31:14 +0100 Subject: [PATCH 1/7] Add mapping --- docs/source/en/model_doc/auto.mdx | 4 +++ src/transformers/__init__.py | 4 +++ src/transformers/models/auto/__init__.py | 4 +++ src/transformers/models/auto/modeling_auto.py | 20 ++++++++++++ .../models/maskformer/__init__.py | 4 +-- .../models/maskformer/modeling_maskformer.py | 31 +++++++++++++------ src/transformers/utils/dummy_pt_objects.py | 10 ++++++ 7 files changed, 65 insertions(+), 12 deletions(-) diff --git a/docs/source/en/model_doc/auto.mdx b/docs/source/en/model_doc/auto.mdx index 7957f453a2fb..b39920151db4 100644 --- a/docs/source/en/model_doc/auto.mdx +++ b/docs/source/en/model_doc/auto.mdx @@ -254,6 +254,10 @@ The following auto classes are available for the following computer vision tasks [[autodoc]] AutoModelForInstanceSegmentation +### AutoModelForUniversalSegmentation + +[[autodoc]] AutoModelForUniversalSegmentation + ### AutoModelForZeroShotObjectDetection [[autodoc]] AutoModelForZeroShotObjectDetection diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index e6758c7df38f..085528a51c55 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -943,6 +943,7 @@ "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING", "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING", + "MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING", "MODEL_FOR_VISION_2_SEQ_MAPPING", "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING", @@ -974,6 +975,7 @@ "AutoModelForSpeechSeq2Seq", "AutoModelForTableQuestionAnswering", "AutoModelForTokenClassification", + "AutoModelForUniversalSegmentation", "AutoModelForVideoClassification", "AutoModelForVision2Seq", "AutoModelForVisualQuestionAnswering", @@ -4112,6 +4114,7 @@ MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, + MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, MODEL_FOR_VISION_2_SEQ_MAPPING, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING, @@ -4143,6 +4146,7 @@ AutoModelForSpeechSeq2Seq, AutoModelForTableQuestionAnswering, AutoModelForTokenClassification, + AutoModelForUniversalSegmentation, AutoModelForVideoClassification, AutoModelForVision2Seq, AutoModelForVisualQuestionAnswering, diff --git a/src/transformers/models/auto/__init__.py b/src/transformers/models/auto/__init__.py index a6ee30366b39..da8ceb8e7e62 100644 --- a/src/transformers/models/auto/__init__.py +++ b/src/transformers/models/auto/__init__.py @@ -67,6 +67,7 @@ "MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING", "MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING", "MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING", + "MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING", "MODEL_FOR_VISION_2_SEQ_MAPPING", "MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING", @@ -97,6 +98,7 @@ "AutoModelForSpeechSeq2Seq", "AutoModelForTableQuestionAnswering", "AutoModelForTokenClassification", + "AutoModelForUniversalSegmentation", "AutoModelForVideoClassification", "AutoModelForVision2Seq", "AutoModelForVisualQuestionAnswering", @@ -222,6 +224,7 @@ MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, + MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING, MODEL_FOR_VISION_2_SEQ_MAPPING, MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING, @@ -253,6 +256,7 @@ AutoModelForSpeechSeq2Seq, AutoModelForTableQuestionAnswering, AutoModelForTokenClassification, + AutoModelForUniversalSegmentation, AutoModelForVideoClassification, AutoModelForVision2Seq, AutoModelForVisualQuestionAnswering, diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index c8dcc9aed1e7..ac4ff3e7df63 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -434,10 +434,18 @@ MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict( [ # Model for Instance Segmentation mapping + # MaskFormerForInstanceSegmentation can be removed from this mapping in v5 ("maskformer", "MaskFormerForInstanceSegmentation"), ] ) +MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict( + [ + # Model for Universal Segmentation mapping + ("maskformer", "MaskFormerForUniversalSegmentation"), + ] +) + MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict( [ ("timesformer", "TimesformerForVideoClassification"), @@ -891,6 +899,9 @@ MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING = _LazyAutoMapping( CONFIG_MAPPING_NAMES, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES ) +MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = _LazyAutoMapping( + CONFIG_MAPPING_NAMES, MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES +) MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = _LazyAutoMapping( CONFIG_MAPPING_NAMES, MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES ) @@ -1082,6 +1093,15 @@ class AutoModelForSemanticSegmentation(_BaseAutoModelClass): ) +class AutoModelForUniversalSegmentation(_BaseAutoModelClass): + _model_mapping = MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING + + +AutoModelForUniversalSegmentation = auto_class_update( + AutoModelForUniversalSegmentation, head_doc="universal image segmentation" +) + + class AutoModelForInstanceSegmentation(_BaseAutoModelClass): _model_mapping = MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py index ba6452c7c405..9c07f86e8219 100644 --- a/src/transformers/models/maskformer/__init__.py +++ b/src/transformers/models/maskformer/__init__.py @@ -43,7 +43,7 @@ else: _import_structure["modeling_maskformer"] = [ "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", - "MaskFormerForInstanceSegmentation", + "MaskFormerForUniversalSegmentation", "MaskFormerModel", "MaskFormerPreTrainedModel", ] @@ -73,7 +73,7 @@ else: from .modeling_maskformer import ( MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, - MaskFormerForInstanceSegmentation, + MaskFormerForUniversalSegmentation, MaskFormerModel, MaskFormerPreTrainedModel, ) diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 298d10879a2f..8846439034ba 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -16,6 +16,7 @@ import math import random +import warnings from dataclasses import dataclass from numbers import Number from typing import Dict, List, Optional, Tuple @@ -188,9 +189,9 @@ class MaskFormerModelOutput(ModelOutput): @dataclass -class MaskFormerForInstanceSegmentationOutput(ModelOutput): +class MaskFormerForUniversalSegmentationOutput(ModelOutput): """ - Class for outputs of [`MaskFormerForInstanceSegmentation`]. + Class for outputs of [`MaskFormerForUniversalSegmentation`]. This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or [`~MaskFormerImageProcessor.post_process_instance_segmentation`] or @@ -1633,7 +1634,7 @@ def forward( return output -class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): +class MaskFormerForUniversalSegmentation(MaskFormerPreTrainedModel): def __init__(self, config: MaskFormerConfig): super().__init__(config) self.model = MaskFormerModel(config) @@ -1715,7 +1716,7 @@ def get_logits(self, outputs: MaskFormerModelOutput) -> Tuple[Tensor, Tensor, Di return class_queries_logits, masks_queries_logits, auxiliary_logits @add_start_docstrings_to_model_forward(MASKFORMER_INPUTS_DOCSTRING) - @replace_return_docstrings(output_type=MaskFormerForInstanceSegmentationOutput, config_class=_CONFIG_FOR_DOC) + @replace_return_docstrings(output_type=MaskFormerForUniversalSegmentationOutput, config_class=_CONFIG_FOR_DOC) def forward( self, pixel_values: Tensor, @@ -1726,7 +1727,7 @@ def forward( output_hidden_states: Optional[bool] = None, output_attentions: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> MaskFormerForInstanceSegmentationOutput: + ) -> MaskFormerForUniversalSegmentationOutput: r""" mask_labels (`List[torch.Tensor]`, *optional*): List of mask labels of shape `(num_labels, height, width)` to be fed to a model @@ -1741,13 +1742,13 @@ def forward( Semantic segmentation example: ```python - >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation + >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation >>> from PIL import Image >>> import requests >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade") - >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade") + >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-ade") >>> url = ( ... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg" @@ -1774,13 +1775,13 @@ def forward( Panoptic segmentation example: ```python - >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation + >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation >>> from PIL import Image >>> import requests >>> # load MaskFormer fine-tuned on COCO panoptic segmentation >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco") - >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco") + >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-coco") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) @@ -1832,7 +1833,7 @@ def forward( if not output_auxiliary_logits: auxiliary_logits = None - output = MaskFormerForInstanceSegmentationOutput( + output = MaskFormerForUniversalSegmentationOutput( loss=loss, **outputs, class_queries_logits=class_queries_logits, @@ -1845,3 +1846,13 @@ def forward( if loss is not None: output = ((loss)) + output return output + + +class MaskFormerForInstanceSegmentation(MaskFormerForUniversalSegmentation): + def __init__(self, *args, **kwargs) -> None: + warnings.warn( + "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of" + " Transformers. Please use MaskFormerForUniversalSegmentation instead.", + FutureWarning, + ) + super().__init__(*args, **kwargs) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index e8fcfa496932..1176f94cec33 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -446,6 +446,9 @@ def __init__(self, *args, **kwargs): MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING = None +MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING = None + + MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING = None @@ -639,6 +642,13 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) +class AutoModelForUniversalSegmentation(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class AutoModelForVideoClassification(metaclass=DummyObject): _backends = ["torch"] From 6903b8cf8bac39da72ad4d626e05d07a06353dc5 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Wed, 14 Dec 2022 14:37:16 +0100 Subject: [PATCH 2/7] Add mapping to pipeline --- src/transformers/pipelines/image_segmentation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/pipelines/image_segmentation.py b/src/transformers/pipelines/image_segmentation.py index 55b2217ccde2..9fdb0dc3314d 100644 --- a/src/transformers/pipelines/image_segmentation.py +++ b/src/transformers/pipelines/image_segmentation.py @@ -16,6 +16,7 @@ MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING, MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING, + MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING, ) @@ -75,6 +76,7 @@ def __init__(self, *args, **kwargs): MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items() + MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() + MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() + + MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING.items() ) ) From 0573266077010f1ac3e4ad7351e566ef997ffe69 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Wed, 14 Dec 2022 19:31:09 +0100 Subject: [PATCH 3/7] Apply suggestions --- docs/source/en/model_doc/maskformer.mdx | 8 +++--- src/transformers/__init__.py | 2 ++ .../models/maskformer/__init__.py | 2 ++ .../maskformer/configuration_maskformer.py | 2 +- .../maskformer/image_processing_maskformer.py | 26 +++++++++---------- src/transformers/utils/dummy_pt_objects.py | 7 +++++ .../maskformer/test_modeling_maskformer.py | 18 ++++++------- utils/check_repo.py | 2 ++ 8 files changed, 40 insertions(+), 27 deletions(-) diff --git a/docs/source/en/model_doc/maskformer.mdx b/docs/source/en/model_doc/maskformer.mdx index 4060cbab9a8f..1f095cdf3463 100644 --- a/docs/source/en/model_doc/maskformer.mdx +++ b/docs/source/en/model_doc/maskformer.mdx @@ -33,7 +33,7 @@ Tips: `get_num_masks` function inside in the `MaskFormerLoss` class of `modeling_maskformer.py`. When training on multiple nodes, this should be set to the average number of target masks across all nodes, as can be seen in the original implementation [here](https://github.com/facebookresearch/MaskFormer/blob/da3e60d85fdeedcb31476b5edd7d328826ce56cc/mask_former/modeling/criterion.py#L169). - One can use [`MaskFormerImageProcessor`] to prepare images for the model and optional targets for the model. -- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForInstanceSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together. +- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together. The figure below illustrates the architecture of MaskFormer. Taken from the [original paper](https://arxiv.org/abs/2107.06278). @@ -51,7 +51,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The [[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput -[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput +[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForUniversalSegmentationOutput ## MaskFormerConfig @@ -80,7 +80,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The [[autodoc]] MaskFormerModel - forward -## MaskFormerForInstanceSegmentation +## MaskFormerForUniversalSegmentation -[[autodoc]] MaskFormerForInstanceSegmentation +[[autodoc]] MaskFormerForUniversalSegmentation - forward diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 085528a51c55..019f709094fd 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1661,6 +1661,7 @@ [ "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", "MaskFormerForInstanceSegmentation", + "MaskFormerForUniversalSegmentation", "MaskFormerModel", "MaskFormerPreTrainedModel", "MaskFormerSwinBackbone", @@ -4704,6 +4705,7 @@ from .models.maskformer import ( MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, MaskFormerForInstanceSegmentation, + MaskFormerForUniversalSegmentation, MaskFormerModel, MaskFormerPreTrainedModel, MaskFormerSwinBackbone, diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py index 9c07f86e8219..c6950ddaeef7 100644 --- a/src/transformers/models/maskformer/__init__.py +++ b/src/transformers/models/maskformer/__init__.py @@ -43,6 +43,7 @@ else: _import_structure["modeling_maskformer"] = [ "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", + "MaskFormerForInstanceSegmentation", "MaskFormerForUniversalSegmentation", "MaskFormerModel", "MaskFormerPreTrainedModel", @@ -73,6 +74,7 @@ else: from .modeling_maskformer import ( MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, + MaskFormerForInstanceSegmentation, MaskFormerForUniversalSegmentation, MaskFormerModel, MaskFormerPreTrainedModel, diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index 655bee2b9a5f..d871fc2ab88d 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -53,7 +53,7 @@ class MaskFormerConfig(PretrainedConfig): no_object_weight (`float`, *optional*, defaults to 0.1): Weight to apply to the null (no object) class. use_auxiliary_loss(`bool`, *optional*, defaults to `False`): - If `True` [`MaskFormerForInstanceSegmentationOutput`] will contain the auxiliary losses computed using the + If `True` [`MaskFormerForUniversalSegmentationOutput`] will contain the auxiliary losses computed using the logits from each decoder's stage. backbone_config (`Dict`, *optional*): The configuration passed to the backbone, if unset, the configuration corresponding to diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 50cef6070028..b87854a25f54 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -54,7 +54,7 @@ if TYPE_CHECKING: - from transformers import MaskFormerForInstanceSegmentationOutput + from transformers import MaskFormerForUniversalSegmentationOutput if is_torch_available(): @@ -872,15 +872,15 @@ def encode_inputs( return encoded_inputs def post_process_segmentation( - self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None + self, outputs: "MaskFormerForUniversalSegmentationOutput", target_size: Tuple[int, int] = None ) -> "torch.Tensor": """ - Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only + Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image segmentation predictions. Only supports PyTorch. Args: - outputs ([`MaskFormerForInstanceSegmentationOutput`]): - The outputs from [`MaskFormerForInstanceSegmentation`]. + outputs ([`MaskFormerForUniversalSegmentationOutput`]): + The outputs from [`MaskFormerForUniversalSegmentation`]. target_size (`Tuple[int, int]`, *optional*): If set, the `masks_queries_logits` will be resized to `target_size`. @@ -923,11 +923,11 @@ def post_process_semantic_segmentation( self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None ) -> "torch.Tensor": """ - Converts the output of [`MaskFormerForInstanceSegmentation`] into semantic segmentation maps. Only supports + Converts the output of [`MaskFormerForUniversalSegmentation`] into semantic segmentation maps. Only supports PyTorch. Args: - outputs ([`MaskFormerForInstanceSegmentation`]): + outputs ([`MaskFormerForUniversalSegmentation`]): Raw outputs of the model. target_sizes (`List[Tuple[int, int]]`, *optional*): List of length (batch_size), where each list item (`Tuple[int, int]]`) corresponds to the requested @@ -979,11 +979,11 @@ def post_process_instance_segmentation( return_coco_annotation: Optional[bool] = False, ) -> List[Dict]: """ - Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only - supports PyTorch. + Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into instance segmentation predictions. + Only supports PyTorch. Args: - outputs ([`MaskFormerForInstanceSegmentation`]): + outputs ([`MaskFormerForUniversalSegmentation`]): Raw outputs of the model. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. @@ -1062,12 +1062,12 @@ def post_process_panoptic_segmentation( target_sizes: Optional[List[Tuple[int, int]]] = None, ) -> List[Dict]: """ - Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image panoptic segmentation + Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image panoptic segmentation predictions. Only supports PyTorch. Args: - outputs ([`MaskFormerForInstanceSegmentationOutput`]): - The outputs from [`MaskFormerForInstanceSegmentation`]. + outputs ([`MaskFormerForUniversalSegmentationOutput`]): + The outputs from [`MaskFormerForUniversalSegmentation`]. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. mask_threshold (`float`, *optional*, defaults to 0.5): diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index 1176f94cec33..edcef5050b8e 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -3450,6 +3450,13 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) +class MaskFormerForUniversalSegmentation(metaclass=DummyObject): + _backends = ["torch"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["torch"]) + + class MaskFormerModel(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py index 52c811591bba..4e834d1fdbee 100644 --- a/tests/models/maskformer/test_modeling_maskformer.py +++ b/tests/models/maskformer/test_modeling_maskformer.py @@ -31,7 +31,7 @@ if is_torch_available(): import torch - from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel + from transformers import MaskFormerForUniversalSegmentation, MaskFormerModel if is_vision_available(): from transformers import MaskFormerFeatureExtractor @@ -135,7 +135,7 @@ def create_and_check_maskformer_model(self, config, pixel_values, pixel_mask, ou def create_and_check_maskformer_instance_segmentation_head_model( self, config, pixel_values, pixel_mask, mask_labels, class_labels ): - model = MaskFormerForInstanceSegmentation(config=config) + model = MaskFormerForUniversalSegmentation(config=config) model.to(torch_device) model.eval() @@ -174,7 +174,7 @@ def comm_check_on_output(result): @require_torch class MaskFormerModelTest(ModelTesterMixin, unittest.TestCase): - all_model_classes = (MaskFormerModel, MaskFormerForInstanceSegmentation) if is_torch_available() else () + all_model_classes = (MaskFormerModel, MaskFormerForUniversalSegmentation) if is_torch_available() else () is_encoder_decoder = False test_pruning = False @@ -245,7 +245,7 @@ def test_model_with_labels(self): "class_labels": torch.zeros(2, 10, device=torch_device).long(), } - model = MaskFormerForInstanceSegmentation(MaskFormerConfig()).to(torch_device) + model = MaskFormerForUniversalSegmentation(MaskFormerConfig()).to(torch_device) outputs = model(**inputs) self.assertTrue(outputs.loss is not None) @@ -264,7 +264,7 @@ def test_attention_outputs(self): def test_training(self): if not self.model_tester.is_training: return - # only MaskFormerForInstanceSegmentation has the loss + # only MaskFormerForUniversalSegmentation has the loss model_class = self.all_model_classes[1] config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs() @@ -276,7 +276,7 @@ def test_training(self): loss.backward() def test_retain_grad_hidden_states_attentions(self): - # only MaskFormerForInstanceSegmentation has the loss + # only MaskFormerForUniversalSegmentation has the loss model_class = self.all_model_classes[1] config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs() config.output_hidden_states = True @@ -371,7 +371,7 @@ def test_inference_no_head(self): def test_inference_instance_segmentation_head(self): model = ( - MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") + MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") .to(torch_device) .eval() ) @@ -415,7 +415,7 @@ def test_inference_instance_segmentation_head(self): def test_inference_instance_segmentation_head_resnet_backbone(self): model = ( - MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff") + MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff") .to(torch_device) .eval() ) @@ -451,7 +451,7 @@ def test_inference_instance_segmentation_head_resnet_backbone(self): def test_with_segmentation_maps_and_loss(self): model = ( - MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") + MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") .to(torch_device) .eval() ) diff --git a/utils/check_repo.py b/utils/check_repo.py index c72c089d7906..07a67222a5b1 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -49,6 +49,7 @@ # Being in this list is an exception and should **not** be the rule. IGNORE_NON_TESTED = PRIVATE_MODELS.copy() + [ # models to ignore for not tested + "MaskFormerForInstanceSegmentation", # This class name is deprecated, MaskFormerForUniversalSegmentation is tested "CLIPSegDecoder", # Building part of bigger (tested) model. "TableTransformerEncoder", # Building part of bigger (tested) model. "TableTransformerDecoder", # Building part of bigger (tested) model. @@ -614,6 +615,7 @@ def find_all_documented_objects(): "LineByLineTextDataset", "LineByLineWithRefDataset", "LineByLineWithSOPTextDataset", + "MaskFormerForInstanceSegmentation", "PretrainedBartModel", "PretrainedFSMTModel", "SingleSentenceClassificationProcessor", From c03702e9ab662dd23a7c33ed8ed1d7dfa52437bd Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Wed, 14 Dec 2022 20:11:25 +0100 Subject: [PATCH 4/7] Fix feature extractor tests --- tests/models/maskformer/test_feature_extraction_maskformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py index ca2f504c06c8..9285a81d50a7 100644 --- a/tests/models/maskformer/test_feature_extraction_maskformer.py +++ b/tests/models/maskformer/test_feature_extraction_maskformer.py @@ -32,7 +32,7 @@ if is_vision_available(): from transformers import MaskFormerFeatureExtractor from transformers.models.maskformer.image_processing_maskformer import binary_mask_to_rle - from transformers.models.maskformer.modeling_maskformer import MaskFormerForInstanceSegmentationOutput + from transformers.models.maskformer.modeling_maskformer import MaskFormerForUniversalSegmentationOutput if is_vision_available(): from PIL import Image @@ -121,7 +121,7 @@ def get_expected_values(self, image_inputs, batched=False): return expected_height, expected_width def get_fake_maskformer_outputs(self): - return MaskFormerForInstanceSegmentationOutput( + return MaskFormerForUniversalSegmentationOutput( # +1 for null class class_queries_logits=torch.randn((self.batch_size, self.num_queries, self.num_classes + 1)), masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), From 99a86905d8f4b33e4585a3a857801343f9589f4d Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 16 Dec 2022 09:33:02 +0100 Subject: [PATCH 5/7] Use ForInstance, add model to universal mapping --- docs/source/en/model_doc/maskformer.mdx | 8 +++--- src/transformers/__init__.py | 2 -- src/transformers/models/auto/modeling_auto.py | 3 ++- .../models/maskformer/__init__.py | 2 -- .../maskformer/configuration_maskformer.py | 2 +- .../maskformer/image_processing_maskformer.py | 26 +++++++++---------- .../models/maskformer/modeling_maskformer.py | 24 ++++++++--------- src/transformers/utils/dummy_pt_objects.py | 7 ----- .../test_feature_extraction_maskformer.py | 4 +-- .../maskformer/test_modeling_maskformer.py | 18 ++++++------- 10 files changed, 43 insertions(+), 53 deletions(-) diff --git a/docs/source/en/model_doc/maskformer.mdx b/docs/source/en/model_doc/maskformer.mdx index 1f095cdf3463..4060cbab9a8f 100644 --- a/docs/source/en/model_doc/maskformer.mdx +++ b/docs/source/en/model_doc/maskformer.mdx @@ -33,7 +33,7 @@ Tips: `get_num_masks` function inside in the `MaskFormerLoss` class of `modeling_maskformer.py`. When training on multiple nodes, this should be set to the average number of target masks across all nodes, as can be seen in the original implementation [here](https://github.com/facebookresearch/MaskFormer/blob/da3e60d85fdeedcb31476b5edd7d328826ce56cc/mask_former/modeling/criterion.py#L169). - One can use [`MaskFormerImageProcessor`] to prepare images for the model and optional targets for the model. -- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForUniversalSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together. +- To get the final segmentation, depending on the task, you can call [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`]. Both tasks can be solved using [`MaskFormerForInstanceSegmentation`] output, panoptic segmentation accepts an optional `label_ids_to_fuse` argument to fuse instances of the target object/s (e.g. sky) together. The figure below illustrates the architecture of MaskFormer. Taken from the [original paper](https://arxiv.org/abs/2107.06278). @@ -51,7 +51,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The [[autodoc]] models.maskformer.modeling_maskformer.MaskFormerModelOutput -[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForUniversalSegmentationOutput +[[autodoc]] models.maskformer.modeling_maskformer.MaskFormerForInstanceSegmentationOutput ## MaskFormerConfig @@ -80,7 +80,7 @@ This model was contributed by [francesco](https://huggingface.co/francesco). The [[autodoc]] MaskFormerModel - forward -## MaskFormerForUniversalSegmentation +## MaskFormerForInstanceSegmentation -[[autodoc]] MaskFormerForUniversalSegmentation +[[autodoc]] MaskFormerForInstanceSegmentation - forward diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py index 019f709094fd..085528a51c55 100644 --- a/src/transformers/__init__.py +++ b/src/transformers/__init__.py @@ -1661,7 +1661,6 @@ [ "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", "MaskFormerForInstanceSegmentation", - "MaskFormerForUniversalSegmentation", "MaskFormerModel", "MaskFormerPreTrainedModel", "MaskFormerSwinBackbone", @@ -4705,7 +4704,6 @@ from .models.maskformer import ( MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, MaskFormerForInstanceSegmentation, - MaskFormerForUniversalSegmentation, MaskFormerModel, MaskFormerPreTrainedModel, MaskFormerSwinBackbone, diff --git a/src/transformers/models/auto/modeling_auto.py b/src/transformers/models/auto/modeling_auto.py index ac4ff3e7df63..ec25db48e233 100644 --- a/src/transformers/models/auto/modeling_auto.py +++ b/src/transformers/models/auto/modeling_auto.py @@ -442,7 +442,8 @@ MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict( [ # Model for Universal Segmentation mapping - ("maskformer", "MaskFormerForUniversalSegmentation"), + ("detr", "DetrForSegmentation"), + ("maskformer", "MaskFormerForInstanceSegmentation"), ] ) diff --git a/src/transformers/models/maskformer/__init__.py b/src/transformers/models/maskformer/__init__.py index c6950ddaeef7..ba6452c7c405 100644 --- a/src/transformers/models/maskformer/__init__.py +++ b/src/transformers/models/maskformer/__init__.py @@ -44,7 +44,6 @@ _import_structure["modeling_maskformer"] = [ "MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST", "MaskFormerForInstanceSegmentation", - "MaskFormerForUniversalSegmentation", "MaskFormerModel", "MaskFormerPreTrainedModel", ] @@ -75,7 +74,6 @@ from .modeling_maskformer import ( MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST, MaskFormerForInstanceSegmentation, - MaskFormerForUniversalSegmentation, MaskFormerModel, MaskFormerPreTrainedModel, ) diff --git a/src/transformers/models/maskformer/configuration_maskformer.py b/src/transformers/models/maskformer/configuration_maskformer.py index d871fc2ab88d..655bee2b9a5f 100644 --- a/src/transformers/models/maskformer/configuration_maskformer.py +++ b/src/transformers/models/maskformer/configuration_maskformer.py @@ -53,7 +53,7 @@ class MaskFormerConfig(PretrainedConfig): no_object_weight (`float`, *optional*, defaults to 0.1): Weight to apply to the null (no object) class. use_auxiliary_loss(`bool`, *optional*, defaults to `False`): - If `True` [`MaskFormerForUniversalSegmentationOutput`] will contain the auxiliary losses computed using the + If `True` [`MaskFormerForInstanceSegmentationOutput`] will contain the auxiliary losses computed using the logits from each decoder's stage. backbone_config (`Dict`, *optional*): The configuration passed to the backbone, if unset, the configuration corresponding to diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index b87854a25f54..50cef6070028 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -54,7 +54,7 @@ if TYPE_CHECKING: - from transformers import MaskFormerForUniversalSegmentationOutput + from transformers import MaskFormerForInstanceSegmentationOutput if is_torch_available(): @@ -872,15 +872,15 @@ def encode_inputs( return encoded_inputs def post_process_segmentation( - self, outputs: "MaskFormerForUniversalSegmentationOutput", target_size: Tuple[int, int] = None + self, outputs: "MaskFormerForInstanceSegmentationOutput", target_size: Tuple[int, int] = None ) -> "torch.Tensor": """ - Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image segmentation predictions. Only + Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image segmentation predictions. Only supports PyTorch. Args: - outputs ([`MaskFormerForUniversalSegmentationOutput`]): - The outputs from [`MaskFormerForUniversalSegmentation`]. + outputs ([`MaskFormerForInstanceSegmentationOutput`]): + The outputs from [`MaskFormerForInstanceSegmentation`]. target_size (`Tuple[int, int]`, *optional*): If set, the `masks_queries_logits` will be resized to `target_size`. @@ -923,11 +923,11 @@ def post_process_semantic_segmentation( self, outputs, target_sizes: Optional[List[Tuple[int, int]]] = None ) -> "torch.Tensor": """ - Converts the output of [`MaskFormerForUniversalSegmentation`] into semantic segmentation maps. Only supports + Converts the output of [`MaskFormerForInstanceSegmentation`] into semantic segmentation maps. Only supports PyTorch. Args: - outputs ([`MaskFormerForUniversalSegmentation`]): + outputs ([`MaskFormerForInstanceSegmentation`]): Raw outputs of the model. target_sizes (`List[Tuple[int, int]]`, *optional*): List of length (batch_size), where each list item (`Tuple[int, int]]`) corresponds to the requested @@ -979,11 +979,11 @@ def post_process_instance_segmentation( return_coco_annotation: Optional[bool] = False, ) -> List[Dict]: """ - Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into instance segmentation predictions. - Only supports PyTorch. + Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into instance segmentation predictions. Only + supports PyTorch. Args: - outputs ([`MaskFormerForUniversalSegmentation`]): + outputs ([`MaskFormerForInstanceSegmentation`]): Raw outputs of the model. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. @@ -1062,12 +1062,12 @@ def post_process_panoptic_segmentation( target_sizes: Optional[List[Tuple[int, int]]] = None, ) -> List[Dict]: """ - Converts the output of [`MaskFormerForUniversalSegmentationOutput`] into image panoptic segmentation + Converts the output of [`MaskFormerForInstanceSegmentationOutput`] into image panoptic segmentation predictions. Only supports PyTorch. Args: - outputs ([`MaskFormerForUniversalSegmentationOutput`]): - The outputs from [`MaskFormerForUniversalSegmentation`]. + outputs ([`MaskFormerForInstanceSegmentationOutput`]): + The outputs from [`MaskFormerForInstanceSegmentation`]. threshold (`float`, *optional*, defaults to 0.5): The probability score threshold to keep predicted instance masks. mask_threshold (`float`, *optional*, defaults to 0.5): diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 8846439034ba..61ab637bb8f3 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -189,9 +189,9 @@ class MaskFormerModelOutput(ModelOutput): @dataclass -class MaskFormerForUniversalSegmentationOutput(ModelOutput): +class MaskFormerForInstanceSegmentationOutput(ModelOutput): """ - Class for outputs of [`MaskFormerForUniversalSegmentation`]. + Class for outputs of [`MaskFormerForInstanceSegmentation`]. This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or [`~MaskFormerImageProcessor.post_process_instance_segmentation`] or @@ -1634,7 +1634,7 @@ def forward( return output -class MaskFormerForUniversalSegmentation(MaskFormerPreTrainedModel): +class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): def __init__(self, config: MaskFormerConfig): super().__init__(config) self.model = MaskFormerModel(config) @@ -1716,7 +1716,7 @@ def get_logits(self, outputs: MaskFormerModelOutput) -> Tuple[Tensor, Tensor, Di return class_queries_logits, masks_queries_logits, auxiliary_logits @add_start_docstrings_to_model_forward(MASKFORMER_INPUTS_DOCSTRING) - @replace_return_docstrings(output_type=MaskFormerForUniversalSegmentationOutput, config_class=_CONFIG_FOR_DOC) + @replace_return_docstrings(output_type=MaskFormerForInstanceSegmentationOutput, config_class=_CONFIG_FOR_DOC) def forward( self, pixel_values: Tensor, @@ -1727,7 +1727,7 @@ def forward( output_hidden_states: Optional[bool] = None, output_attentions: Optional[bool] = None, return_dict: Optional[bool] = None, - ) -> MaskFormerForUniversalSegmentationOutput: + ) -> MaskFormerForInstanceSegmentationOutput: r""" mask_labels (`List[torch.Tensor]`, *optional*): List of mask labels of shape `(num_labels, height, width)` to be fed to a model @@ -1742,13 +1742,13 @@ def forward( Semantic segmentation example: ```python - >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation + >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation >>> from PIL import Image >>> import requests >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade") - >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-ade") + >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade") >>> url = ( ... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg" @@ -1775,13 +1775,13 @@ def forward( Panoptic segmentation example: ```python - >>> from transformers import MaskFormerImageProcessor, MaskFormerForUniversalSegmentation + >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation >>> from PIL import Image >>> import requests >>> # load MaskFormer fine-tuned on COCO panoptic segmentation >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco") - >>> model = MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-base-coco") + >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) @@ -1833,7 +1833,7 @@ def forward( if not output_auxiliary_logits: auxiliary_logits = None - output = MaskFormerForUniversalSegmentationOutput( + output = MaskFormerForInstanceSegmentationOutput( loss=loss, **outputs, class_queries_logits=class_queries_logits, @@ -1848,11 +1848,11 @@ def forward( return output -class MaskFormerForInstanceSegmentation(MaskFormerForUniversalSegmentation): +class MaskFormerForInstanceSegmentation(MaskFormerForInstanceSegmentation): def __init__(self, *args, **kwargs) -> None: warnings.warn( "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of" - " Transformers. Please use MaskFormerForUniversalSegmentation instead.", + " Transformers. Please use MaskFormerForInstanceSegmentation instead.", FutureWarning, ) super().__init__(*args, **kwargs) diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py index edcef5050b8e..1176f94cec33 100644 --- a/src/transformers/utils/dummy_pt_objects.py +++ b/src/transformers/utils/dummy_pt_objects.py @@ -3450,13 +3450,6 @@ def __init__(self, *args, **kwargs): requires_backends(self, ["torch"]) -class MaskFormerForUniversalSegmentation(metaclass=DummyObject): - _backends = ["torch"] - - def __init__(self, *args, **kwargs): - requires_backends(self, ["torch"]) - - class MaskFormerModel(metaclass=DummyObject): _backends = ["torch"] diff --git a/tests/models/maskformer/test_feature_extraction_maskformer.py b/tests/models/maskformer/test_feature_extraction_maskformer.py index 9285a81d50a7..ca2f504c06c8 100644 --- a/tests/models/maskformer/test_feature_extraction_maskformer.py +++ b/tests/models/maskformer/test_feature_extraction_maskformer.py @@ -32,7 +32,7 @@ if is_vision_available(): from transformers import MaskFormerFeatureExtractor from transformers.models.maskformer.image_processing_maskformer import binary_mask_to_rle - from transformers.models.maskformer.modeling_maskformer import MaskFormerForUniversalSegmentationOutput + from transformers.models.maskformer.modeling_maskformer import MaskFormerForInstanceSegmentationOutput if is_vision_available(): from PIL import Image @@ -121,7 +121,7 @@ def get_expected_values(self, image_inputs, batched=False): return expected_height, expected_width def get_fake_maskformer_outputs(self): - return MaskFormerForUniversalSegmentationOutput( + return MaskFormerForInstanceSegmentationOutput( # +1 for null class class_queries_logits=torch.randn((self.batch_size, self.num_queries, self.num_classes + 1)), masks_queries_logits=torch.randn((self.batch_size, self.num_queries, self.height, self.width)), diff --git a/tests/models/maskformer/test_modeling_maskformer.py b/tests/models/maskformer/test_modeling_maskformer.py index 4e834d1fdbee..52c811591bba 100644 --- a/tests/models/maskformer/test_modeling_maskformer.py +++ b/tests/models/maskformer/test_modeling_maskformer.py @@ -31,7 +31,7 @@ if is_torch_available(): import torch - from transformers import MaskFormerForUniversalSegmentation, MaskFormerModel + from transformers import MaskFormerForInstanceSegmentation, MaskFormerModel if is_vision_available(): from transformers import MaskFormerFeatureExtractor @@ -135,7 +135,7 @@ def create_and_check_maskformer_model(self, config, pixel_values, pixel_mask, ou def create_and_check_maskformer_instance_segmentation_head_model( self, config, pixel_values, pixel_mask, mask_labels, class_labels ): - model = MaskFormerForUniversalSegmentation(config=config) + model = MaskFormerForInstanceSegmentation(config=config) model.to(torch_device) model.eval() @@ -174,7 +174,7 @@ def comm_check_on_output(result): @require_torch class MaskFormerModelTest(ModelTesterMixin, unittest.TestCase): - all_model_classes = (MaskFormerModel, MaskFormerForUniversalSegmentation) if is_torch_available() else () + all_model_classes = (MaskFormerModel, MaskFormerForInstanceSegmentation) if is_torch_available() else () is_encoder_decoder = False test_pruning = False @@ -245,7 +245,7 @@ def test_model_with_labels(self): "class_labels": torch.zeros(2, 10, device=torch_device).long(), } - model = MaskFormerForUniversalSegmentation(MaskFormerConfig()).to(torch_device) + model = MaskFormerForInstanceSegmentation(MaskFormerConfig()).to(torch_device) outputs = model(**inputs) self.assertTrue(outputs.loss is not None) @@ -264,7 +264,7 @@ def test_attention_outputs(self): def test_training(self): if not self.model_tester.is_training: return - # only MaskFormerForUniversalSegmentation has the loss + # only MaskFormerForInstanceSegmentation has the loss model_class = self.all_model_classes[1] config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs() @@ -276,7 +276,7 @@ def test_training(self): loss.backward() def test_retain_grad_hidden_states_attentions(self): - # only MaskFormerForUniversalSegmentation has the loss + # only MaskFormerForInstanceSegmentation has the loss model_class = self.all_model_classes[1] config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs() config.output_hidden_states = True @@ -371,7 +371,7 @@ def test_inference_no_head(self): def test_inference_instance_segmentation_head(self): model = ( - MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") + MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") .to(torch_device) .eval() ) @@ -415,7 +415,7 @@ def test_inference_instance_segmentation_head(self): def test_inference_instance_segmentation_head_resnet_backbone(self): model = ( - MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff") + MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff") .to(torch_device) .eval() ) @@ -451,7 +451,7 @@ def test_inference_instance_segmentation_head_resnet_backbone(self): def test_with_segmentation_maps_and_loss(self): model = ( - MaskFormerForUniversalSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") + MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-small-coco") .to(torch_device) .eval() ) From 8706d43b806ec3271e251681b4ddfd170730b197 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 16 Dec 2022 09:36:13 +0100 Subject: [PATCH 6/7] More fixes --- .../models/maskformer/modeling_maskformer.py | 11 ----------- utils/check_repo.py | 2 -- 2 files changed, 13 deletions(-) diff --git a/src/transformers/models/maskformer/modeling_maskformer.py b/src/transformers/models/maskformer/modeling_maskformer.py index 61ab637bb8f3..298d10879a2f 100644 --- a/src/transformers/models/maskformer/modeling_maskformer.py +++ b/src/transformers/models/maskformer/modeling_maskformer.py @@ -16,7 +16,6 @@ import math import random -import warnings from dataclasses import dataclass from numbers import Number from typing import Dict, List, Optional, Tuple @@ -1846,13 +1845,3 @@ def forward( if loss is not None: output = ((loss)) + output return output - - -class MaskFormerForInstanceSegmentation(MaskFormerForInstanceSegmentation): - def __init__(self, *args, **kwargs) -> None: - warnings.warn( - "The class MaskFormerForInstanceSegmentation is deprecated and will be removed in version 5 of" - " Transformers. Please use MaskFormerForInstanceSegmentation instead.", - FutureWarning, - ) - super().__init__(*args, **kwargs) diff --git a/utils/check_repo.py b/utils/check_repo.py index 07a67222a5b1..da16270acfeb 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -49,7 +49,6 @@ # Being in this list is an exception and should **not** be the rule. IGNORE_NON_TESTED = PRIVATE_MODELS.copy() + [ # models to ignore for not tested - "MaskFormerForInstanceSegmentation", # This class name is deprecated, MaskFormerForUniversalSegmentation is tested "CLIPSegDecoder", # Building part of bigger (tested) model. "TableTransformerEncoder", # Building part of bigger (tested) model. "TableTransformerDecoder", # Building part of bigger (tested) model. @@ -240,7 +239,6 @@ "VisualBertForMultipleChoice", "TFWav2Vec2ForCTC", "TFHubertForCTC", - "MaskFormerForInstanceSegmentation", "XCLIPVisionModel", "XCLIPTextModel", ] From ab69b1f0e4df2a703f04501555f4a9fac5c44018 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 16 Dec 2022 12:49:39 +0100 Subject: [PATCH 7/7] =?UTF-8?q?Remove=20model=20from=20deprecated=20object?= =?UTF-8?q?s=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/check_repo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/check_repo.py b/utils/check_repo.py index da16270acfeb..1c2fd4b45c41 100644 --- a/utils/check_repo.py +++ b/utils/check_repo.py @@ -613,7 +613,6 @@ def find_all_documented_objects(): "LineByLineTextDataset", "LineByLineWithRefDataset", "LineByLineWithSOPTextDataset", - "MaskFormerForInstanceSegmentation", "PretrainedBartModel", "PretrainedFSMTModel", "SingleSentenceClassificationProcessor",