Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 10 additions & 89 deletions tests/models/blip_2/test_processor_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import shutil

import tempfile
import unittest

import numpy as np
import pytest

from transformers import Blip2Processor, BlipImageProcessor, GPT2Tokenizer, GPT2TokenizerFast
from transformers.testing_utils import require_vision
from transformers.utils import is_vision_available


if is_vision_available():
from PIL import Image

from transformers import AutoProcessor, Blip2Processor, BlipImageProcessor, GPT2Tokenizer, PreTrainedTokenizerFast
from ...test_processing_common import ProcessorTesterMixin


@require_vision
class Blip2ProcessorTest(unittest.TestCase):
class Blip2ProcessorTest(ProcessorTesterMixin, unittest.TestCase):
tokenizer_class = GPT2Tokenizer
fast_tokenizer_class = GPT2TokenizerFast
image_processor_class = BlipImageProcessor
processor_class = Blip2Processor

def setUp(self):
self.tmpdirname = tempfile.mkdtemp()

Expand All @@ -40,77 +40,11 @@ def setUp(self):

processor.save_pretrained(self.tmpdirname)

def get_tokenizer(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).tokenizer

def get_image_processor(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor

def tearDown(self):
shutil.rmtree(self.tmpdirname)

def prepare_image_inputs(self):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
"""

image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]

image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]

return image_inputs

def test_save_load_pretrained_additional_features(self):
processor = Blip2Processor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
processor.save_pretrained(self.tmpdirname)

tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
image_processor_add_kwargs = self.get_image_processor(do_normalize=False, padding_value=1.0)

processor = Blip2Processor.from_pretrained(
self.tmpdirname, bos_token="(BOS)", eos_token="(EOS)", do_normalize=False, padding_value=1.0
)

self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
self.assertIsInstance(processor.tokenizer, PreTrainedTokenizerFast)

self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.image_processor, BlipImageProcessor)

def test_image_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = Blip2Processor(tokenizer=tokenizer, image_processor=image_processor)

image_input = self.prepare_image_inputs()

input_feat_extract = image_processor(image_input, return_tensors="np")
input_processor = processor(images=image_input, return_tensors="np")

for key in input_feat_extract.keys():
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)

def test_tokenizer(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = Blip2Processor(tokenizer=tokenizer, image_processor=image_processor)

input_str = "lower newer"

encoded_processor = processor(text=input_str)

encoded_tok = tokenizer(input_str, return_token_type_ids=False)

for key in encoded_tok.keys():
self.assertListEqual(encoded_tok[key], encoded_processor[key])

def test_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = Blip2Processor(tokenizer=tokenizer, image_processor=image_processor)
processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor)

input_str = "lower newer"
image_input = self.prepare_image_inputs()
Expand All @@ -123,19 +57,6 @@ def test_processor(self):
with pytest.raises(ValueError):
processor()

def test_tokenizer_decode(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = Blip2Processor(tokenizer=tokenizer, image_processor=image_processor)

predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]

decoded_processor = processor.batch_decode(predicted_ids)
decoded_tok = tokenizer.batch_decode(predicted_ids)

self.assertListEqual(decoded_tok, decoded_processor)

def test_model_input_names(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()
Expand Down
157 changes: 9 additions & 148 deletions tests/models/clip/test_processor_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,24 @@

import json
import os
import shutil
import tempfile
import unittest

import numpy as np
import pytest

from transformers import CLIPTokenizer, CLIPTokenizerFast
from transformers import CLIPImageProcessor, CLIPProcessor, CLIPTokenizer, CLIPTokenizerFast
from transformers.models.clip.tokenization_clip import VOCAB_FILES_NAMES
from transformers.testing_utils import require_vision
from transformers.utils import IMAGE_PROCESSOR_NAME, is_vision_available

from transformers.utils import IMAGE_PROCESSOR_NAME

if is_vision_available():
from PIL import Image

from transformers import CLIPImageProcessor, CLIPProcessor
from ...test_processing_common import ProcessorTesterMixin


@require_vision
class CLIPProcessorTest(unittest.TestCase):
class CLIPProcessorTest(ProcessorTesterMixin, unittest.TestCase):
tokenizer_class = CLIPTokenizer
fast_tokenizer_class = CLIPTokenizerFast
image_processor_class = CLIPImageProcessor
processor_class = CLIPProcessor

def setUp(self):
self.tmpdirname = tempfile.mkdtemp()

Expand Down Expand Up @@ -62,139 +59,3 @@ def setUp(self):
self.image_processor_file = os.path.join(self.tmpdirname, IMAGE_PROCESSOR_NAME)
with open(self.image_processor_file, "w", encoding="utf-8") as fp:
json.dump(image_processor_map, fp)

def get_tokenizer(self, **kwargs):
return CLIPTokenizer.from_pretrained(self.tmpdirname, **kwargs)

def get_rust_tokenizer(self, **kwargs):
return CLIPTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)

def get_image_processor(self, **kwargs):
return CLIPImageProcessor.from_pretrained(self.tmpdirname, **kwargs)

def tearDown(self):
shutil.rmtree(self.tmpdirname)

def prepare_image_inputs(self):
"""This function prepares a list of PIL images, or a list of numpy arrays if one specifies numpify=True,
or a list of PyTorch tensors if one specifies torchify=True.
"""

image_inputs = [np.random.randint(255, size=(3, 30, 400), dtype=np.uint8)]

image_inputs = [Image.fromarray(np.moveaxis(x, 0, -1)) for x in image_inputs]

return image_inputs

def test_save_load_pretrained_default(self):
tokenizer_slow = self.get_tokenizer()
tokenizer_fast = self.get_rust_tokenizer()
image_processor = self.get_image_processor()

processor_slow = CLIPProcessor(tokenizer=tokenizer_slow, image_processor=image_processor)
processor_slow.save_pretrained(self.tmpdirname)
processor_slow = CLIPProcessor.from_pretrained(self.tmpdirname, use_fast=False)

processor_fast = CLIPProcessor(tokenizer=tokenizer_fast, image_processor=image_processor)
processor_fast.save_pretrained(self.tmpdirname)
processor_fast = CLIPProcessor.from_pretrained(self.tmpdirname)

self.assertEqual(processor_slow.tokenizer.get_vocab(), tokenizer_slow.get_vocab())
self.assertEqual(processor_fast.tokenizer.get_vocab(), tokenizer_fast.get_vocab())
self.assertEqual(tokenizer_slow.get_vocab(), tokenizer_fast.get_vocab())
self.assertIsInstance(processor_slow.tokenizer, CLIPTokenizer)
self.assertIsInstance(processor_fast.tokenizer, CLIPTokenizerFast)

self.assertEqual(processor_slow.image_processor.to_json_string(), image_processor.to_json_string())
self.assertEqual(processor_fast.image_processor.to_json_string(), image_processor.to_json_string())
self.assertIsInstance(processor_slow.image_processor, CLIPImageProcessor)
self.assertIsInstance(processor_fast.image_processor, CLIPImageProcessor)

def test_save_load_pretrained_additional_features(self):
processor = CLIPProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
processor.save_pretrained(self.tmpdirname)

tokenizer_add_kwargs = self.get_tokenizer(bos_token="(BOS)", eos_token="(EOS)")
image_processor_add_kwargs = self.get_image_processor(do_normalize=False, padding_value=1.0)

processor = CLIPProcessor.from_pretrained(
self.tmpdirname, bos_token="(BOS)", eos_token="(EOS)", do_normalize=False, padding_value=1.0
)

self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
self.assertIsInstance(processor.tokenizer, CLIPTokenizerFast)

self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
self.assertIsInstance(processor.image_processor, CLIPImageProcessor)

def test_image_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = CLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)

image_input = self.prepare_image_inputs()

input_image_proc = image_processor(image_input, return_tensors="np")
input_processor = processor(images=image_input, return_tensors="np")

for key in input_image_proc.keys():
self.assertAlmostEqual(input_image_proc[key].sum(), input_processor[key].sum(), delta=1e-2)

def test_tokenizer(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = CLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)

input_str = "lower newer"

encoded_processor = processor(text=input_str)

encoded_tok = tokenizer(input_str)

for key in encoded_tok.keys():
self.assertListEqual(encoded_tok[key], encoded_processor[key])

def test_processor(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = CLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)

input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input)

self.assertListEqual(list(inputs.keys()), ["input_ids", "attention_mask", "pixel_values"])

# test if it raises when no input is passed
with pytest.raises(ValueError):
processor()

def test_tokenizer_decode(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = CLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)

predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]

decoded_processor = processor.batch_decode(predicted_ids)
decoded_tok = tokenizer.batch_decode(predicted_ids)

self.assertListEqual(decoded_tok, decoded_processor)

def test_model_input_names(self):
image_processor = self.get_image_processor()
tokenizer = self.get_tokenizer()

processor = CLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)

input_str = "lower newer"
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input)

self.assertListEqual(list(inputs.keys()), processor.model_input_names)
Loading