From 471cb96e4ae5fcd90e22be6ed2212e557c31d401 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Thu, 8 Dec 2022 14:11:08 +0100 Subject: [PATCH 1/5] Improve ResNet backbone --- src/transformers/models/resnet/modeling_resnet.py | 4 ++-- tests/models/resnet/test_modeling_resnet.py | 12 +++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/resnet/modeling_resnet.py b/src/transformers/models/resnet/modeling_resnet.py index c3d65ddc05e6..2696c56cf25c 100644 --- a/src/transformers/models/resnet/modeling_resnet.py +++ b/src/transformers/models/resnet/modeling_resnet.py @@ -267,7 +267,7 @@ def _init_weights(self, module): nn.init.constant_(module.bias, 0) def _set_gradient_checkpointing(self, module, value=False): - if isinstance(module, (ResNetModel, ResNetBackbone)): + if isinstance(module, ResNetEncoder): module.gradient_checkpointing = value @@ -439,7 +439,7 @@ def __init__(self, config): self.embedder = ResNetEmbeddings(config) self.encoder = ResNetEncoder(config) - self.out_features = config.out_features + self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] out_feature_channels = {} out_feature_channels["stem"] = config.embedding_size diff --git a/tests/models/resnet/test_modeling_resnet.py b/tests/models/resnet/test_modeling_resnet.py index 53777d27c84d..f9fca537a340 100644 --- a/tests/models/resnet/test_modeling_resnet.py +++ b/tests/models/resnet/test_modeling_resnet.py @@ -119,7 +119,7 @@ def create_and_check_backbone(self, config, pixel_values, labels): model.eval() result = model(pixel_values) - # verify hidden states + # verify feature maps self.parent.assertEqual(len(result.feature_maps), len(config.out_features)) self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[1], 4, 4]) @@ -127,6 +127,16 @@ def create_and_check_backbone(self, config, pixel_values, labels): self.parent.assertEqual(len(model.channels), len(config.out_features)) self.parent.assertListEqual(model.channels, config.hidden_sizes[1:]) + # verify backbone works with out_features=None + config.out_features = None + model = ResNetBackbone(config=config) + model.to(torch_device) + model.eval() + result = model(pixel_values) + + self.parent.assertEqual(len(result.feature_maps), 1) + self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[-1], 1, 1]) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() config, pixel_values, labels = config_and_inputs From ec3624b42410d919387f465e4179250be083b31a Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Thu, 8 Dec 2022 14:13:33 +0100 Subject: [PATCH 2/5] Improve Bit backbone --- src/transformers/models/bit/modeling_bit.py | 2 +- tests/models/bit/test_modeling_bit.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index df85add18c22..31cc61e2a0c3 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -851,7 +851,7 @@ def __init__(self, config): self.stage_names = config.stage_names self.bit = BitModel(config) - self.out_features = config.out_features + self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] out_feature_channels = {} out_feature_channels["stem"] = config.embedding_size diff --git a/tests/models/bit/test_modeling_bit.py b/tests/models/bit/test_modeling_bit.py index 0c3bf147c890..34c200858531 100644 --- a/tests/models/bit/test_modeling_bit.py +++ b/tests/models/bit/test_modeling_bit.py @@ -119,7 +119,7 @@ def create_and_check_backbone(self, config, pixel_values, labels): model.eval() result = model(pixel_values) - # verify hidden states + # verify feature maps self.parent.assertEqual(len(result.feature_maps), len(config.out_features)) self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[1], 4, 4]) @@ -127,6 +127,16 @@ def create_and_check_backbone(self, config, pixel_values, labels): self.parent.assertEqual(len(model.channels), len(config.out_features)) self.parent.assertListEqual(model.channels, config.hidden_sizes[1:]) + # verify backbone works with out_features=None + config.out_features = None + model = BitBackbone(config=config) + model.to(torch_device) + model.eval() + result = model(pixel_values) + + self.parent.assertEqual(len(result.feature_maps), 1) + self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[-1], 1, 1]) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() config, pixel_values, labels = config_and_inputs From d268c336abef6619cd8d44bc29b595b2a2fae301 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Thu, 8 Dec 2022 14:17:52 +0100 Subject: [PATCH 3/5] Improve docstrings --- src/transformers/models/bit/configuration_bit.py | 2 +- .../models/maskformer/configuration_maskformer_swin.py | 4 ++-- .../models/maskformer/modeling_maskformer_swin.py | 2 +- src/transformers/models/resnet/configuration_resnet.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transformers/models/bit/configuration_bit.py b/src/transformers/models/bit/configuration_bit.py index 6418549ab876..ef0f6b7fdaa6 100644 --- a/src/transformers/models/bit/configuration_bit.py +++ b/src/transformers/models/bit/configuration_bit.py @@ -63,7 +63,7 @@ class BitConfig(PretrainedConfig): The width factor for the model. out_features (`List[str]`, *optional*): If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. - (depending on how many stages the model has). + (depending on how many stages the model has). Defaults to the last stage in case of `None`. Example: ```python diff --git a/src/transformers/models/maskformer/configuration_maskformer_swin.py b/src/transformers/models/maskformer/configuration_maskformer_swin.py index 4c9f1a4ca4df..b9c72fbf3380 100644 --- a/src/transformers/models/maskformer/configuration_maskformer_swin.py +++ b/src/transformers/models/maskformer/configuration_maskformer_swin.py @@ -68,8 +68,8 @@ class MaskFormerSwinConfig(PretrainedConfig): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. layer_norm_eps (`float`, *optional*, defaults to 1e-12): The epsilon used by the layer normalization layers. - out_features (`List[str]`, *optional*): - If used as a backbone, list of feature names to output, e.g. `["stage1", "stage2"]`. + If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. + (depending on how many stages the model has). Defaults to the last stage in case of `None`. Example: diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index 60410a36210d..34790d5f2d97 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -855,7 +855,7 @@ def __init__(self, config: MaskFormerSwinConfig): self.stage_names = config.stage_names self.model = MaskFormerSwinModel(config) - self.out_features = config.out_features + self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] if "stem" in self.out_features: raise ValueError("This backbone does not support 'stem' in the `out_features`.") diff --git a/src/transformers/models/resnet/configuration_resnet.py b/src/transformers/models/resnet/configuration_resnet.py index 2d0dbc3b0fdb..ced8ce41bd8d 100644 --- a/src/transformers/models/resnet/configuration_resnet.py +++ b/src/transformers/models/resnet/configuration_resnet.py @@ -59,8 +59,8 @@ class ResNetConfig(PretrainedConfig): downsample_in_first_stage (`bool`, *optional*, defaults to `False`): If `True`, the first stage will downsample the inputs using a `stride` of 2. out_features (`List[str]`, *optional*): - If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, - `"stage3"`, `"stage4"`. + If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. + (depending on how many stages the model has). Defaults to the last stage in case of `None`. Example: ```python From 507396cb029cd1103e505db10f560b3414d7d961 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Thu, 8 Dec 2022 14:48:55 +0100 Subject: [PATCH 4/5] Fix default stage --- src/transformers/models/bit/modeling_bit.py | 2 +- .../models/maskformer/configuration_maskformer_swin.py | 5 +++-- .../models/maskformer/modeling_maskformer_swin.py | 2 +- src/transformers/models/resnet/modeling_resnet.py | 2 +- tests/models/bit/test_modeling_bit.py | 5 +++++ tests/models/resnet/test_modeling_resnet.py | 5 +++++ 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index 31cc61e2a0c3..d40e7f4e8c02 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -851,7 +851,7 @@ def __init__(self, config): self.stage_names = config.stage_names self.bit = BitModel(config) - self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] + self.out_features = config.out_features if config.out_features is not None else [self.stage_names[-1]] out_feature_channels = {} out_feature_channels["stem"] = config.embedding_size diff --git a/src/transformers/models/maskformer/configuration_maskformer_swin.py b/src/transformers/models/maskformer/configuration_maskformer_swin.py index b9c72fbf3380..c22f46b6e949 100644 --- a/src/transformers/models/maskformer/configuration_maskformer_swin.py +++ b/src/transformers/models/maskformer/configuration_maskformer_swin.py @@ -68,8 +68,9 @@ class MaskFormerSwinConfig(PretrainedConfig): The standard deviation of the truncated_normal_initializer for initializing all weight matrices. layer_norm_eps (`float`, *optional*, defaults to 1e-12): The epsilon used by the layer normalization layers. - If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. - (depending on how many stages the model has). Defaults to the last stage in case of `None`. + out_features (`List[str]`, *optional*): + If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. + (depending on how many stages the model has). Defaults to the last stage in case of `None`. Example: diff --git a/src/transformers/models/maskformer/modeling_maskformer_swin.py b/src/transformers/models/maskformer/modeling_maskformer_swin.py index 34790d5f2d97..09bdd817dafd 100644 --- a/src/transformers/models/maskformer/modeling_maskformer_swin.py +++ b/src/transformers/models/maskformer/modeling_maskformer_swin.py @@ -855,7 +855,7 @@ def __init__(self, config: MaskFormerSwinConfig): self.stage_names = config.stage_names self.model = MaskFormerSwinModel(config) - self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] + self.out_features = config.out_features if config.out_features is not None else [self.stage_names[-1]] if "stem" in self.out_features: raise ValueError("This backbone does not support 'stem' in the `out_features`.") diff --git a/src/transformers/models/resnet/modeling_resnet.py b/src/transformers/models/resnet/modeling_resnet.py index 2696c56cf25c..9efedd1faa9c 100644 --- a/src/transformers/models/resnet/modeling_resnet.py +++ b/src/transformers/models/resnet/modeling_resnet.py @@ -439,7 +439,7 @@ def __init__(self, config): self.embedder = ResNetEmbeddings(config) self.encoder = ResNetEncoder(config) - self.out_features = config.out_features if config.out_features is not None else self.stage_names[-1] + self.out_features = config.out_features if config.out_features is not None else [self.stage_names[-1]] out_feature_channels = {} out_feature_channels["stem"] = config.embedding_size diff --git a/tests/models/bit/test_modeling_bit.py b/tests/models/bit/test_modeling_bit.py index 34c200858531..7b7e07cb8fb6 100644 --- a/tests/models/bit/test_modeling_bit.py +++ b/tests/models/bit/test_modeling_bit.py @@ -134,9 +134,14 @@ def create_and_check_backbone(self, config, pixel_values, labels): model.eval() result = model(pixel_values) + # verify feature maps self.parent.assertEqual(len(result.feature_maps), 1) self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[-1], 1, 1]) + # verify channels + self.parent.assertEqual(len(model.channels), 1) + self.parent.assertListEqual(model.channels, [config.hidden_sizes[-1]]) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() config, pixel_values, labels = config_and_inputs diff --git a/tests/models/resnet/test_modeling_resnet.py b/tests/models/resnet/test_modeling_resnet.py index f9fca537a340..15d3dca3c53f 100644 --- a/tests/models/resnet/test_modeling_resnet.py +++ b/tests/models/resnet/test_modeling_resnet.py @@ -134,9 +134,14 @@ def create_and_check_backbone(self, config, pixel_values, labels): model.eval() result = model(pixel_values) + # verify feature maps self.parent.assertEqual(len(result.feature_maps), 1) self.parent.assertListEqual(list(result.feature_maps[0].shape), [self.batch_size, self.hidden_sizes[-1], 1, 1]) + # verify channels + self.parent.assertEqual(len(model.channels), 1) + self.parent.assertListEqual(model.channels, [config.hidden_sizes[-1]]) + def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() config, pixel_values, labels = config_and_inputs From 28c2c723363417500c47264c70e5923c70b809f6 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Thu, 8 Dec 2022 15:31:44 +0100 Subject: [PATCH 5/5] Apply suggestions from code review --- src/transformers/models/bit/configuration_bit.py | 2 +- .../models/maskformer/configuration_maskformer_swin.py | 2 +- src/transformers/models/resnet/configuration_resnet.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/bit/configuration_bit.py b/src/transformers/models/bit/configuration_bit.py index ef0f6b7fdaa6..7c1e105107e3 100644 --- a/src/transformers/models/bit/configuration_bit.py +++ b/src/transformers/models/bit/configuration_bit.py @@ -63,7 +63,7 @@ class BitConfig(PretrainedConfig): The width factor for the model. out_features (`List[str]`, *optional*): If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. - (depending on how many stages the model has). Defaults to the last stage in case of `None`. + (depending on how many stages the model has). Will default to the last stage if unset. Example: ```python diff --git a/src/transformers/models/maskformer/configuration_maskformer_swin.py b/src/transformers/models/maskformer/configuration_maskformer_swin.py index c22f46b6e949..36e0746552c8 100644 --- a/src/transformers/models/maskformer/configuration_maskformer_swin.py +++ b/src/transformers/models/maskformer/configuration_maskformer_swin.py @@ -70,7 +70,7 @@ class MaskFormerSwinConfig(PretrainedConfig): The epsilon used by the layer normalization layers. out_features (`List[str]`, *optional*): If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. - (depending on how many stages the model has). Defaults to the last stage in case of `None`. + (depending on how many stages the model has). Will default to the last stage if unset. Example: diff --git a/src/transformers/models/resnet/configuration_resnet.py b/src/transformers/models/resnet/configuration_resnet.py index ced8ce41bd8d..74f6c6939722 100644 --- a/src/transformers/models/resnet/configuration_resnet.py +++ b/src/transformers/models/resnet/configuration_resnet.py @@ -60,7 +60,7 @@ class ResNetConfig(PretrainedConfig): If `True`, the first stage will downsample the inputs using a `stride` of 2. out_features (`List[str]`, *optional*): If used as backbone, list of features to output. Can be any of `"stem"`, `"stage1"`, `"stage2"`, etc. - (depending on how many stages the model has). Defaults to the last stage in case of `None`. + (depending on how many stages the model has). Will default to the last stage if unset. Example: ```python