Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/transformers/feature_extraction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def __init__(
skip_tensor_conversion: list[str] | set[str] | None = None,
):
super().__init__(data)
self.convert_to_tensors(tensor_type=tensor_type, skip_tensor_conversion=skip_tensor_conversion)
self.skip_tensor_conversion = skip_tensor_conversion
self.convert_to_tensors(tensor_type=tensor_type)

def __getitem__(self, item: str) -> Any:
"""
Expand Down Expand Up @@ -178,6 +179,9 @@ def convert_to_tensors(
return self

is_tensor, as_tensor = self._get_is_as_tensor_fns(tensor_type)
skip_tensor_conversion = (
skip_tensor_conversion if skip_tensor_conversion is not None else self.skip_tensor_conversion
)

# Do the tensor conversion in batch
for key, value in self.items():
Expand Down
40 changes: 40 additions & 0 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2312,6 +2312,14 @@ def _initialize_weights(self, module):
if getattr(module, "_is_hf_initialized", False):
return

if (
(weight := getattr(module, "weight", None)) is not None
and getattr(weight, "_is_hf_initialized", False)
and not list(module.named_buffers())
):
module._is_hf_initialized = True
return

self._init_weights(module)
module._is_hf_initialized = True

Expand Down Expand Up @@ -4202,6 +4210,9 @@ def _finalize_model_loading(
missing keys from meta device to their expected device, reinitializing missing weights according to proper
distributions, tying the weights and logging the loading report."""
try:
# Adjust `all_tied_weights_keys` before marking them as initialized
model._adjust_tied_keys_with_tied_pointers(loading_info.missing_and_mismatched())

# Marks tied weights as `_is_hf_initialized` to avoid initializing them (it's very important for efficiency)
model.mark_tied_weights_as_initialized()

Expand Down Expand Up @@ -4417,6 +4428,35 @@ def get_compiled_call(self, compile_config: CompileConfig | None) -> Callable:
def is_backend_compatible(cls):
return cls._supports_attention_backend

def _adjust_tied_keys_with_tied_pointers(self, missing_keys: list[str]) -> None:
"""
Adds keys to `self.all_tied_weights_keys` by checking if any group of params
share the same data ptr. It helps us support remote code where the weight tying is
done in old-T5 style, by manually assigning the same module to different param names.
If we don't add them back in `self.all_tied_weights_keys`, they will be re-initialized
and all params in tied group get random weights.
"""
param_pointers = defaultdict(list)
for param_name, param_value in self.state_dict().items():
param_pointers[param_value.data_ptr()].append(param_name)

# Filter out params that are already in `self.all_tied_weights_keys` or if all
# are missing params. Missing param groups share the same data ptr by being on `meta`
tied_param_names = [
names
for names in param_pointers.values()
if len(names) > 1
and not any(name in self.all_tied_weights_keys.keys() for name in names)
and not all(name in missing_keys for name in names)
]

# Create a dummy mapping, it doesn't matter which one is source/target
# because they are already tied
tied_weights_keys_by_pointers = {
param_name: group[0] for group in tied_param_names for param_name in group[1:]
}
self.all_tied_weights_keys.update(tied_weights_keys_by_pointers)

def _move_missing_keys_from_meta_to_device(
self,
missing_keys: list[str],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,8 @@ class Qwen2_5OmniTextConfig(PreTrainedConfig):
with longer `max_position_embeddings`.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Whether to tie weight embeddings

Example:

Expand Down Expand Up @@ -328,6 +330,7 @@ def __init__(
pad_token_id: int | None = None,
bos_token_id: int | None = None,
eos_token_id: int | None = None,
tie_word_embeddings: bool | None = True,
**kwargs,
):
self.vocab_size = vocab_size
Expand All @@ -342,6 +345,7 @@ def __init__(
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings

# for backward compatibility
if num_key_value_heads is None:
Expand Down
4 changes: 4 additions & 0 deletions src/transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ class Qwen2_5OmniTextConfig(PreTrainedConfig):
with longer `max_position_embeddings`.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
tie_word_embeddings (`bool`, *optional*, defaults to `True`):
Whether to tie weight embeddings

Example:

Expand Down Expand Up @@ -368,6 +370,7 @@ def __init__(
pad_token_id: int | None = None,
bos_token_id: int | None = None,
eos_token_id: int | None = None,
tie_word_embeddings: bool | None = True,
**kwargs,
):
self.vocab_size = vocab_size
Expand All @@ -382,6 +385,7 @@ def __init__(
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
self.eos_token_id = eos_token_id
self.tie_word_embeddings = tie_word_embeddings

# for backward compatibility
if num_key_value_heads is None:
Expand Down
4 changes: 1 addition & 3 deletions src/transformers/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,13 @@ class methods and docstrings.

Attributes:
do_convert_rgb (`bool`):
Whether to convert the video to RGB format.
Whether to convert the image to RGB format.
do_resize (`bool`, *optional*):
Whether to resize the image.
size (`dict[str, int]`, *optional*):
Resize the shorter side of the input to `size["shortest_edge"]`.
crop_size (`dict[str, int]`, *optional*):
Desired output size when applying center-cropping.
do_convert_rgb (`bool`):
Whether to convert the video to RGB format.
resample (`PILImageResampling`, *optional*):
Resampling filter to use if resizing the image.
do_rescale (`bool`, *optional*):
Expand Down
34 changes: 33 additions & 1 deletion src/transformers/tokenization_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def __init__(self, **kwargs):

# 5. Special tokens mask configuration
# Patterns: "none", "cls_sep", "eos", "bos", "bos_eos", "cls_double_sep", "prefix_suffix"
self.special_tokens_pattern = kwargs.pop("special_tokens_pattern", "cls_sep")
self.special_tokens_pattern = kwargs.pop("special_tokens_pattern", None)

# 6. Set backend to "custom" if not already set (for direct PreTrainedTokenizer subclasses)
if "backend" not in kwargs:
Expand Down Expand Up @@ -883,30 +883,62 @@ def build_inputs_with_special_tokens(
"""
if self.special_tokens_pattern == "cls_sep":
# [CLS] seq0 [SEP] or [CLS] seq0 [SEP] seq1 [SEP]
if self.cls_token_id is None and self.sep_token_id is None:
raise ValueError(
"Cannot add special tokens following 'cls_sep' pattern because one or several special tokens "
f"are not defined (cls_token_id={self.cls_token_id}; sep_token_id={self.sep_token_id})"
"Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
)
if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id] + token_ids_1 + [self.sep_token_id]

elif self.special_tokens_pattern == "eos":
# seq0 [EOS] or seq0 [EOS] seq1 [EOS]
if self.eos_token_id is None:
raise ValueError(
"Cannot add special tokens following 'eos' pattern because eos token is not defined "
f"(eos_token_id={self.eos_token_id})."
"Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
)
if token_ids_1 is None:
return token_ids_0 + [self.eos_token_id]
return token_ids_0 + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]

elif self.special_tokens_pattern == "bos":
# [BOS] seq0 or [BOS] seq0 [BOS] seq1
if self.bos_token_id is None:
raise ValueError(
"Cannot add special tokens following 'bos' pattern because bos token is not defined "
f"(bos_token_id={self.bos_token_id})."
"Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
)
if token_ids_1 is None:
return [self.bos_token_id] + token_ids_0
return [self.bos_token_id] + token_ids_0 + [self.bos_token_id] + token_ids_1

elif self.special_tokens_pattern == "bos_eos":
# [BOS] seq0 [EOS] or [BOS] seq0 [EOS] seq1 [EOS]
if self.bos_token_id is None and self.eos_token_id is None:
raise ValueError(
"Cannot add special tokens following 'bos_eos' pattern because one or several special tokens "
f"are not defined (bos_token_id={self.bos_token_id}; eos_token_id={self.eos_token_id})"
"Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
)
return token_ids_0 if token_ids_1 is None else token_ids_0 + token_ids_1

if token_ids_1 is None:
return [self.bos_token_id] + token_ids_0 + [self.eos_token_id]
return [self.bos_token_id] + token_ids_0 + [self.eos_token_id] + token_ids_1 + [self.eos_token_id]

elif self.special_tokens_pattern == "cls_double_sep":
# [CLS] seq0 [SEP] or [CLS] seq0 [SEP] [SEP] seq1 [SEP]
if self.cls_token_id is None and self.sep_token_id is None:
raise ValueError(
"Cannot add special tokens following 'cls_double_sep' pattern because one or several special tokens "
f"are not defined (cls_token_id={self.cls_token_id}; sep_token_id={self.sep_token_id})"
"Set the required special tokens in tokenizer or update `tokenizer.special_tokens_pattern`"
)
if token_ids_1 is None:
return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
return (
Expand Down