From 906b9f69c6e49f51912a10046631f5f982bf802e Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Fri, 20 Jan 2023 16:35:53 +0000 Subject: [PATCH 1/2] fix `blip` doctest --- src/transformers/models/blip/modeling_blip.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index f00c9f9cabbb..872917a102eb 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -1176,17 +1176,28 @@ def forward( >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) - >>> text = "How many cats are in the picture?" + >>> # training + >>> text = "How many cats are in the picture?" + >>> label = "2" >>> inputs = processor(images=image, text=text, return_tensors="pt") + >>> labels = processor(text=label, return_tensors="pt").input_ids + >>> inputs["labels"] = labels >>> outputs = model(**inputs) + + >>> # inference + >>> text = "How many cats are in the picture?" + >>> inputs = processor(images=image, text=text, return_tensors="pt") + >>> outputs = model.generate(**inputs) + >>> print(processor.decode(outputs[0], skip_special_tokens=True)) + 2 ```""" if labels is None and decoder_input_ids is None: raise ValueError( "Either `decoder_input_ids` or `labels` should be passed when calling `forward` with" " `BlipForQuestionAnswering`. if you are training the model make sure that `labels` is passed, if you" - " are using the model for inference make sure that `decoder_input_ids` is passed." + " are using the model for inference make sure that `decoder_input_ids` is passed or call `generate`" ) return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1392,8 +1403,8 @@ def forward( >>> import requests >>> from transformers import BlipProcessor, BlipForImageTextRetrieval - >>> model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base") - >>> processor = BlipProcessor.from_pretrained("Salesforce/blip-itm-base") + >>> model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco") + >>> processor = BlipProcessor.from_pretrained("Salesforce/blip-itm-base-coco") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) From 213300143db61aa32e170071c01d58cdf2681b74 Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Mon, 23 Jan 2023 10:54:30 +0100 Subject: [PATCH 2/2] Update src/transformers/models/blip/modeling_blip.py Co-authored-by: NielsRogge <48327001+NielsRogge@users.noreply.github.com> --- src/transformers/models/blip/modeling_blip.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transformers/models/blip/modeling_blip.py b/src/transformers/models/blip/modeling_blip.py index 872917a102eb..bdef8c442e7b 100644 --- a/src/transformers/models/blip/modeling_blip.py +++ b/src/transformers/models/blip/modeling_blip.py @@ -1185,6 +1185,8 @@ def forward( >>> inputs["labels"] = labels >>> outputs = model(**inputs) + >>> loss = outputs.loss + >>> loss.backward() >>> # inference >>> text = "How many cats are in the picture?"