Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ def preprocess(
"""
do_resize = do_resize if do_resize is not None else self.do_resize
size = size if size is not None else self.size
size = get_size_dict(size, default_to_square=False)
image_grid_pinpoints = image_grid_pinpoints if image_grid_pinpoints is not None else self.image_grid_pinpoints
resample = resample if resample is not None else self.resample
do_rescale = do_rescale if do_rescale is not None else self.do_rescale
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""

import math
import os
import sys
from typing import Iterable, List, Union

Expand All @@ -34,6 +35,11 @@
ProcessorMixin,
)
from ...tokenization_utils_base import PreTokenizedInput, TextInput
from ...utils import logging
from ..auto import AutoImageProcessor


logger = logging.get_logger(__name__)


class LlavaOnevisionProcessorKwargs(ProcessingKwargs, total=False):
Expand Down Expand Up @@ -96,7 +102,7 @@ def __init__(
chat_template=None,
image_token="<image>",
video_token="<video>",
**kwargs: Unpack[LlavaOnevisionProcessorKwargs],
**kwargs,
):
self.num_image_tokens = num_image_tokens
self.vision_feature_select_strategy = vision_feature_select_strategy
Expand All @@ -109,7 +115,7 @@ def __call__(
images: ImageInput = None,
text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
videos: VideoInput = None,
**kwargs,
**kwargs: Unpack[LlavaOnevisionProcessorKwargs],
) -> BatchFeature:
"""
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
Expand Down Expand Up @@ -272,3 +278,47 @@ def model_input_names(self):
tokenizer_input_names = self.tokenizer.model_input_names
image_processor_input_names = self.image_processor.model_input_names
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))

# override to save video-config in a separate config file
def save_pretrained(self, save_directory, **kwargs):
if os.path.isfile(save_directory):
raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
os.makedirs(save_directory, exist_ok=True)
video_processor_path = os.path.join(save_directory, "video_processor")
self.video_processor.save_pretrained(video_processor_path)

video_processor_present = "video_processor" in self.attributes
if video_processor_present:
self.attributes.remove("video_processor")

outputs = super().save_pretrained(save_directory, **kwargs)

if video_processor_present:
self.attributes += ["video_processor"]
return outputs

# override to load video-config from a separate config file
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor = super().from_pretrained(pretrained_model_name_or_path, **kwargs)

# if return_unused_kwargs a tuple is returned where the second element is 'unused_kwargs'
if isinstance(processor, tuple):
processor = processor[0]

try:
video_processor = AutoImageProcessor.from_pretrained(
pretrained_model_name_or_path, subfolder="video_processor"
)
processor.video_processor = video_processor
except EnvironmentError:
# this means users are using prev version of saved processor where we had only one preprocessor_config.json
# for loading back that should work and load a LlavaOnevisionVideoProcessor class
logger.info(
"You are loading `LlavaOnevisionProcessor` but the indicated `path` doesn't contain a folder called "
"`video_processor`. It is strongly recommended to load and save the processor again so the video processor is saved "
"in a separate config."
)
pass

return processor
21 changes: 11 additions & 10 deletions tests/models/llava_onevision/test_processing_llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,16 @@ def get_video_processor(self, **kwargs):
return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).video_processor

def prepare_processor_dict(self):
return {"chat_template": "dummy_template"}
return {"chat_template": "dummy_template", "num_image_tokens": 6, "vision_feature_select_strategy": "default"}

@unittest.skip(
"Skip because the model has no processor kwargs except for chat template and"
"chat template is saved as a separate file. Stop skipping this test when the processor"
"has new kwargs saved in config file."
)
def test_processor_to_json_string(self):
pass
processor = self.get_processor()
obj = json.loads(processor.to_json_string())
for key, value in self.prepare_processor_dict().items():
# chat_tempalate are tested as a separate test because they are saved in separate files
if key != "chat_template":
self.assertEqual(obj[key], value)
self.assertEqual(getattr(processor, key, None), value)

# Copied from tests.models.llava.test_processor_llava.LlavaProcessorTest.test_chat_template_is_saved
def test_chat_template_is_saved(self):
Expand Down Expand Up @@ -191,7 +192,7 @@ def test_unstructured_kwargs_batched(self):
max_length=76,
)
self.assertEqual(inputs["pixel_values"].shape[3], 214)
self.assertEqual(len(inputs["input_ids"][0]), 5)
self.assertEqual(len(inputs["input_ids"][0]), 4)

@require_torch
@require_vision
Expand Down Expand Up @@ -282,7 +283,7 @@ def test_kwargs_overrides_default_tokenizer_kwargs(self):
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, return_tensors="pt", max_length=112)
self.assertEqual(len(inputs["input_ids"][0]), 112)
self.assertEqual(len(inputs["input_ids"][0]), 2)

@require_vision
@require_torch
Expand All @@ -299,4 +300,4 @@ def test_tokenizer_defaults_preserved_by_kwargs(self):
image_input = self.prepare_image_inputs()

inputs = processor(text=input_str, images=image_input, return_tensors="pt")
self.assertEqual(len(inputs["input_ids"][0]), 117)
self.assertEqual(len(inputs["input_ids"][0]), 2)
8 changes: 8 additions & 0 deletions tests/test_processing_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ def test_processor_from_and_save_pretrained(self):

self.assertEqual(processor_second.to_dict(), processor_first.to_dict())

for attribute in processor_first.attributes:
attribute_first = getattr(processor_first, attribute)
attribute_second = getattr(processor_second, attribute)

# tokenizer repr contains model-path from where we loaded
if "tokenizer" not in attribute:
self.assertEqual(repr(attribute_first), repr(attribute_second))

# These kwargs-related tests ensure that processors are correctly instantiated.
# they need to be applied only if an image_processor exists.

Expand Down