From 0d0525e1a17181aca3e5c77b1eec446190e40867 Mon Sep 17 00:00:00 2001 From: Anahita Bhiwandiwalla Date: Wed, 8 Mar 2023 14:29:58 -0800 Subject: [PATCH 1/8] Use return_loss for BridgeTowerForContrastiveLearning, add example --- .../bridgetower/modeling_bridgetower.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index f405407d7d9b..e00f4a18b1cf 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -1789,12 +1789,11 @@ def forward( output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = True, return_dict: Optional[bool] = None, - labels: Optional[torch.LongTensor] = None, + return_loss: Optional[bool] = True, ) -> Union[BridgeTowerContrastiveOutput, Tuple[torch.FloatTensor]]: r""" - labels (`torch.LongTensor` of shape `(batch_size, 1)`, *optional*): - Labels for computing the image-text matching loss. 0 means the pairs don't match and 1 means they match. - The pairs with 0 will be skipped for calculation. + return_loss (`bool`, *optional*): + Whether or not to return the contrastive loss. Default is True. Returns: Examples: @@ -1803,14 +1802,25 @@ def forward( >>> from transformers import BridgeTowerProcessor, BridgeTowerForContrastiveLearning >>> import requests >>> from PIL import Image + >>> import torch - >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" - >>> image = Image.open(requests.get(url, stream=True).raw) - >>> texts = "An image of two cats chilling on a couch" + >>> image_urls = ["https://farm4.staticflickr.com/3395/3428278415_81c3e27f15_z.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg"] + >>> texts = ["two dogs in a car", "two cats sleeping on a couch"] + >>> images = [Image.open(requests.get(url, stream=True).raw) for url in image_urls] - >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") + >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm") >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") - >>> outputs = model(**inputs, output_hidden_states=True) + + >>> inputs = processor(images, texts, padding=True, return_tensors="pt") + >>> outputs = model(**inputs) + + >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt") + >>> outputs_swapped = model(**inputs) + + >>> print('Loss', outputs.loss.item()) + Loss 0.00191505195107311 + >>> print('Loss with swapped images', outputs_swapped.loss.item()) + Loss with swapped images 2.1259872913360596 ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1857,8 +1867,8 @@ def forward( itc_loss = None - if labels is not None: - labels = torch.arange(len(labels), device=logits.device) + if return_loss: + labels = torch.arange(len(logits), device=logits.device) text_to_image_loss = nn.functional.cross_entropy(logits_text_to_image, labels) text_to_cross_loss = nn.functional.cross_entropy(logits_text_to_cross, labels) image_to_cross_loss = nn.functional.cross_entropy(logits_image_to_cross, labels) From 280abc047167e18d5ee29a9e72609d9b0227186d Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Mon, 13 Mar 2023 08:29:02 -0700 Subject: [PATCH 2/8] fix tests --- .../bridgetower/modeling_bridgetower.py | 29 +++++++++++++------ .../bridgetower/test_modeling_bridgetower.py | 17 +++++++---- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index e00f4a18b1cf..8891ad06a121 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -166,6 +166,8 @@ class BridgeTowerContrastiveOutput(ModelOutput): Output type of ['BridgeTowerForContrastiveLearning'] Args: + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + Image-text contrastive loss. logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). text_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`): @@ -174,8 +176,6 @@ class BridgeTowerContrastiveOutput(ModelOutput): The image embeddings obtained by applying the projection layer to the pooler_output. cross_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`): The text-image cross-modal embeddings obtained by applying the projection layer to the pooler_output. - loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): - Image-text contrastive loss. attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, sequence_length)`. @@ -185,11 +185,11 @@ class BridgeTowerContrastiveOutput(ModelOutput): the model at the output of each layer plus the optional initial embedding outputs. """ + loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None text_embeds: Optional[Tuple[torch.FloatTensor]] = None image_embeds: Optional[Tuple[torch.FloatTensor]] = None cross_embeds: Optional[Tuple[torch.FloatTensor]] = None - loss: Optional[torch.FloatTensor] = None attentions: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None @@ -1804,22 +1804,26 @@ def forward( >>> from PIL import Image >>> import torch - >>> image_urls = ["https://farm4.staticflickr.com/3395/3428278415_81c3e27f15_z.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg"] + >>> image_urls = [ + ... "https://farm4.staticflickr.com/3395/3428278415_81c3e27f15_z.jpg", + ... "http://images.cocodataset.org/val2017/000000039769.jpg", + ... ] >>> texts = ["two dogs in a car", "two cats sleeping on a couch"] >>> images = [Image.open(requests.get(url, stream=True).raw) for url in image_urls] >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm") >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") - >>> inputs = processor(images, texts, padding=True, return_tensors="pt") + >>> inputs = processor(images, texts, padding=True, return_tensors="pt") >>> outputs = model(**inputs) - >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt") + >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt") >>> outputs_swapped = model(**inputs) - >>> print('Loss', outputs.loss.item()) + >>> print("Loss", outputs.loss.item()) Loss 0.00191505195107311 - >>> print('Loss with swapped images', outputs_swapped.loss.item()) + + >>> print("Loss with swapped images", outputs_swapped.loss.item()) Loss with swapped images 2.1259872913360596 ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1875,7 +1879,14 @@ def forward( itc_loss = (text_to_image_loss + text_to_cross_loss + image_to_cross_loss) / 3.0 if not return_dict: - output = tuple(logits) + output = (logits, text_embeds, image_embeds, cross_embeds) + if output_attentions: + output = output + (outputs[4],) + if output_hidden_states: + output = output + (outputs[3],) + elif output_hidden_states: + output = output + (outputs[3],) + return ((itc_loss,) + output) if itc_loss is not None else output return BridgeTowerContrastiveOutput( diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index 20396c8bf7bf..8205f2fab843 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -94,7 +94,7 @@ def __init__( self.num_hidden_layers = num_hidden_layers self.tie_word_embeddings = tie_word_embeddings self.init_layernorm_from_vision_encoder = init_layernorm_from_vision_encoder - self.vocab_size = 50265 + self.vocab_size = 99 self.num_channels = 3 self.seq_length = 4 self.num_image_features = 325 @@ -115,6 +115,8 @@ def prepare_config_and_inputs(self): return (config, input_ids, attention_mask, pixel_values, pixel_mask) def get_config(self): + text_config = {"vocab_size": self.vocab_size} + return BridgeTowerConfig( share_cross_modal_transformer_layers=self.share_cross_modal_transformer_layers, drop_rate=self.drop_rate, @@ -135,6 +137,7 @@ def get_config(self): output_hidden_states=self.output_hidden_states, contrastive_hidden_size=self.contrastive_hidden_size, logit_scale_init_value=self.logit_scale_init_value, + text_config=text_config, ) def create_and_check_model( @@ -231,7 +234,7 @@ def extract_output(self, outputs, model_class): def setUp(self): self.model_tester = BridgeTowerModelTester(self) - self.config_tester = ConfigTester(self, config_class=BridgeTowerConfig, hidden_size=37, vocab_size=50265) + self.config_tester = ConfigTester(self, config_class=BridgeTowerConfig, hidden_size=37, vocab_size=99) def test_config(self): self.config_tester.run_common_tests() @@ -483,10 +486,10 @@ def test_constrastive_learning(self): torch_device ) model.eval() - processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") + processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm") image = prepare_img() text = "a bunch of cats laying on a tower." - inputs = processor(image, text, return_tensors="pt").to(torch_device) + inputs = processor(image, text, padding=True, return_tensors="pt").to(torch_device) with torch.no_grad(): outputs = model(**inputs, output_hidden_states=True) @@ -507,14 +510,16 @@ class BridgeTowerModelTrainingTest(unittest.TestCase): def setUp(self): self.model_tester = BridgeTowerModelTester(self) - self.config_tester = ConfigTester(self, config_class=BridgeTowerConfig, hidden_size=37, vocab_size=50265) + self.config_tester = ConfigTester(self, config_class=BridgeTowerConfig, hidden_size=37, vocab_size=99) def _prepare_inputs_for_training(self, model_class): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() if model_class == BridgeTowerForMaskedLM: inputs_dict["labels"] = inputs_dict["input_ids"] - elif model_class == BridgeTowerForImageAndTextRetrieval or model_class == BridgeTowerForContrastiveLearning: + elif model_class == BridgeTowerForImageAndTextRetrieval: inputs_dict["labels"] = ids_tensor([1], 2) + elif model_class == BridgeTowerForContrastiveLearning: + inputs_dict["return_loss"] = True return config, inputs_dict def _get_non_used_layer_names(self, model_class): From 5d54c37e496095581bae4ee7534a9e7068b418a0 Mon Sep 17 00:00:00 2001 From: Anahita Bhiwandiwalla Date: Mon, 13 Mar 2023 16:31:16 -0700 Subject: [PATCH 3/8] Update example in BridgeTowerForContrastiveLearning --- src/transformers/models/bridgetower/modeling_bridgetower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 8891ad06a121..60b8a86520b1 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -1811,7 +1811,7 @@ def forward( >>> texts = ["two dogs in a car", "two cats sleeping on a couch"] >>> images = [Image.open(requests.get(url, stream=True).raw) for url in image_urls] - >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm") + >>> processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") >>> inputs = processor(images, texts, padding=True, return_tensors="pt") From 5e60f56ac93c0b0bcc4fb5ea39b483da2a9d48db Mon Sep 17 00:00:00 2001 From: Anahita Bhiwandiwalla Date: Mon, 13 Mar 2023 16:41:15 -0700 Subject: [PATCH 4/8] Update test_modeling_bridgetower.py --- tests/models/bridgetower/test_modeling_bridgetower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index 8205f2fab843..1aa9b23fbb82 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -486,7 +486,7 @@ def test_constrastive_learning(self): torch_device ) model.eval() - processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm") + processor = BridgeTowerProcessor.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") image = prepare_img() text = "a bunch of cats laying on a tower." inputs = processor(image, text, padding=True, return_tensors="pt").to(torch_device) From 38e4bb7eac9e02a542c48358e7271389095fc8d2 Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Tue, 14 Mar 2023 17:40:37 -0700 Subject: [PATCH 5/8] update model output format --- .../bridgetower/modeling_bridgetower.py | 43 ++++++++----------- .../bridgetower/test_modeling_bridgetower.py | 2 +- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 60b8a86520b1..124ff12a6236 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -166,7 +166,7 @@ class BridgeTowerContrastiveOutput(ModelOutput): Output type of ['BridgeTowerForContrastiveLearning'] Args: - loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided): + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss=True`: Image-text contrastive loss. logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). @@ -176,13 +176,13 @@ class BridgeTowerContrastiveOutput(ModelOutput): The image embeddings obtained by applying the projection layer to the pooler_output. cross_embeds (`torch.FloatTensor)`, *optional*, returned when model is initialized with `with_projection=True`): The text-image cross-modal embeddings obtained by applying the projection layer to the pooler_output. - attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): - Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, - sequence_length)`. hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`): Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, + one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus the optional initial embedding outputs. + attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`): + Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length, + sequence_length)`. """ loss: Optional[torch.FloatTensor] = None @@ -190,8 +190,8 @@ class BridgeTowerContrastiveOutput(ModelOutput): text_embeds: Optional[Tuple[torch.FloatTensor]] = None image_embeds: Optional[Tuple[torch.FloatTensor]] = None cross_embeds: Optional[Tuple[torch.FloatTensor]] = None - attentions: Optional[Tuple[torch.FloatTensor]] = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None + attentions: Optional[Tuple[torch.FloatTensor]] = None class BridgeTowerResidualAttention(nn.Module): @@ -1789,11 +1789,11 @@ def forward( output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = True, return_dict: Optional[bool] = None, - return_loss: Optional[bool] = True, + return_loss: Optional[bool] = None, ) -> Union[BridgeTowerContrastiveOutput, Tuple[torch.FloatTensor]]: r""" return_loss (`bool`, *optional*): - Whether or not to return the contrastive loss. Default is True. + Whether or not to return the contrastive loss. Returns: Examples: @@ -1815,16 +1815,16 @@ def forward( >>> model = BridgeTowerForContrastiveLearning.from_pretrained("BridgeTower/bridgetower-large-itm-mlm-itc") >>> inputs = processor(images, texts, padding=True, return_tensors="pt") - >>> outputs = model(**inputs) + >>> loss = model(**inputs, return_loss=True).loss >>> inputs = processor(images, texts[::-1], padding=True, return_tensors="pt") - >>> outputs_swapped = model(**inputs) + >>> loss_swapped = model(**inputs, return_loss=True).loss - >>> print("Loss", outputs.loss.item()) - Loss 0.00191505195107311 + >>> print("Loss", round(loss.item(), 4)) + Loss 0.0019 - >>> print("Loss with swapped images", outputs_swapped.loss.item()) - Loss with swapped images 2.1259872913360596 + >>> print("Loss with swapped images", round(loss_swapped.item(), 4)) + Loss with swapped images 2.126 ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1879,22 +1879,15 @@ def forward( itc_loss = (text_to_image_loss + text_to_cross_loss + image_to_cross_loss) / 3.0 if not return_dict: - output = (logits, text_embeds, image_embeds, cross_embeds) - if output_attentions: - output = output + (outputs[4],) - if output_hidden_states: - output = output + (outputs[3],) - elif output_hidden_states: - output = output + (outputs[3],) - + output = (logits, text_embeds, image_embeds, cross_embeds) + outputs[3:] return ((itc_loss,) + output) if itc_loss is not None else output return BridgeTowerContrastiveOutput( - attentions=outputs.attentions, - hidden_states=outputs.hidden_states, + loss=itc_loss, + logits=logits, text_embeds=text_embeds, image_embeds=image_embeds, cross_embeds=cross_embeds, - logits=logits, - loss=itc_loss, + attentions=outputs.attentions, + hidden_states=outputs.hidden_states, ) diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index 1aa9b23fbb82..c83bdf7da80c 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -491,7 +491,7 @@ def test_constrastive_learning(self): text = "a bunch of cats laying on a tower." inputs = processor(image, text, padding=True, return_tensors="pt").to(torch_device) with torch.no_grad(): - outputs = model(**inputs, output_hidden_states=True) + outputs = model(**inputs, output_hidden_states=True, return_loss=True) # verify the logits expected_shape = torch.Size([1, 3, 512]) From b2471ee37b99914a2dfb987e27f94e1abf3ba14d Mon Sep 17 00:00:00 2001 From: Tiep Le Date: Wed, 15 Mar 2023 09:14:20 -0700 Subject: [PATCH 6/8] minor update --- src/transformers/models/bridgetower/modeling_bridgetower.py | 2 +- tests/models/bridgetower/test_modeling_bridgetower.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 124ff12a6236..fb57a6d56ee0 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -166,7 +166,7 @@ class BridgeTowerContrastiveOutput(ModelOutput): Output type of ['BridgeTowerForContrastiveLearning'] Args: - loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss=True`: + loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `return_loss` is `True`: Image-text contrastive loss. logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`): Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). diff --git a/tests/models/bridgetower/test_modeling_bridgetower.py b/tests/models/bridgetower/test_modeling_bridgetower.py index c83bdf7da80c..078d66b51645 100644 --- a/tests/models/bridgetower/test_modeling_bridgetower.py +++ b/tests/models/bridgetower/test_modeling_bridgetower.py @@ -115,8 +115,6 @@ def prepare_config_and_inputs(self): return (config, input_ids, attention_mask, pixel_values, pixel_mask) def get_config(self): - text_config = {"vocab_size": self.vocab_size} - return BridgeTowerConfig( share_cross_modal_transformer_layers=self.share_cross_modal_transformer_layers, drop_rate=self.drop_rate, @@ -137,7 +135,6 @@ def get_config(self): output_hidden_states=self.output_hidden_states, contrastive_hidden_size=self.contrastive_hidden_size, logit_scale_init_value=self.logit_scale_init_value, - text_config=text_config, ) def create_and_check_model( @@ -191,7 +188,7 @@ def create_and_check_for_masked_language_modeling( result = model(input_ids, attention_mask=attention_mask, pixel_values=pixel_values, pixel_mask=pixel_mask) result = model(input_ids, attention_mask=attention_mask, pixel_values=pixel_values) - self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) + self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, 50265)) def prepare_config_and_inputs_for_common(self): config_and_inputs = self.prepare_config_and_inputs() From dff0fe1077e7a77528ec328c9413e84105c574cf Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 15 Mar 2023 18:27:55 +0100 Subject: [PATCH 7/8] Update src/transformers/models/bridgetower/modeling_bridgetower.py --- src/transformers/models/bridgetower/modeling_bridgetower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index fb57a6d56ee0..26e392f04f14 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -1888,6 +1888,6 @@ def forward( text_embeds=text_embeds, image_embeds=image_embeds, cross_embeds=cross_embeds, - attentions=outputs.attentions, hidden_states=outputs.hidden_states, + attentions=outputs.attentions, ) From 07509f46868989779deeb5515a4f74890d1c3e2d Mon Sep 17 00:00:00 2001 From: ydshieh Date: Wed, 15 Mar 2023 19:06:49 +0100 Subject: [PATCH 8/8] make style --- src/transformers/models/bridgetower/modeling_bridgetower.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/bridgetower/modeling_bridgetower.py b/src/transformers/models/bridgetower/modeling_bridgetower.py index 26e392f04f14..209ff9703ff2 100644 --- a/src/transformers/models/bridgetower/modeling_bridgetower.py +++ b/src/transformers/models/bridgetower/modeling_bridgetower.py @@ -1889,5 +1889,5 @@ def forward( image_embeds=image_embeds, cross_embeds=cross_embeds, hidden_states=outputs.hidden_states, - attentions=outputs.attentions, + attentions=outputs.attentions, )