Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions trl/experimental/dppo/dppo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,23 +246,28 @@ def _tokenize_prompts(self, prompts: list):
images.append(prompt_images if prompt_images else None)
images = images if has_images else None

# We pass padding=True to work around a bug introduced in transformers 5.2.0 in some processors
# (e.g. Qwen2.5-VL) that crash on batched unpadded input. We then unpad input_ids using attention_mask.
# See: https://github.com/huggingface/transformers/issues/44514
# Workaround for a bug in transformers 5.3.0 where some processors (e.g. Qwen2.5-VL) crash on
# batched unpadded input (transformers#44514).
# Fixed in transformers 5.4.0 (transformers#44563).
needs_padding_workaround = Version("5.3.0") <= Version(transformers.__version__) < Version("5.4.0")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we were wrong, it was introduced in 5.3 not 5.2?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I tested locally.

The original comment also incorrectly attributed the bug to transformers 5.2.0

tokenized = self.processing_class.apply_chat_template(
conversation=prompts,
tools=self.tools or None, # `or None`: Llama bug: it renders tool boilerplate for tools=[]
chat_template=self.chat_template,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
padding=True,
**({"padding": True} if needs_padding_workaround else {}),
**self.chat_template_kwargs,
)
prompt_ids = [
[tok for tok, mask in zip(ids, attention_mask, strict=True) if mask]
for ids, attention_mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
if needs_padding_workaround:
# Unpad input_ids: remove padding tokens using attention_mask to get per-sequence lists
prompt_ids = [
[tok for tok, m in zip(ids, mask, strict=True) if m]
for ids, mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
else:
prompt_ids = tokenized["input_ids"]
multimodal_fields = {k: v for k, v in tokenized.items() if k not in ("input_ids", "attention_mask")}
else:
prompt_ids = self.processing_class(text=prompts)["input_ids"]
Expand Down
22 changes: 13 additions & 9 deletions trl/trainer/grpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,24 +1289,28 @@ def _tokenize_prompts(self, prompts: list):
images.append(prompt_images if prompt_images else None)
images = images if has_images else None

# We pass padding=True to work around a bug introduced in transformers 5.2.0 in some processors
# (e.g. Qwen2.5-VL) that crash on batched unpadded input. We then unpad input_ids using attention_mask.
# See: https://github.com/huggingface/transformers/issues/44514
# Workaround for a bug in transformers 5.3.0 where some processors (e.g. Qwen2.5-VL) crash on
# batched unpadded input (transformers#44514).
# Fixed in transformers 5.4.0 (transformers#44563).
needs_padding_workaround = Version("5.3.0") <= Version(transformers.__version__) < Version("5.4.0")
tokenized = self.processing_class.apply_chat_template(
conversation=prompts,
tools=self.tools or None, # `or None`: Llama bug: it renders tool boilerplate for tools=[]
chat_template=self.chat_template,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
padding=True,
**({"padding": True} if needs_padding_workaround else {}),
**self.chat_template_kwargs,
)
# Unpad input_ids: remove padding tokens using attention_mask to get per-sequence lists
prompt_ids = [
[tok for tok, m in zip(ids, mask, strict=True) if m]
for ids, mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
if needs_padding_workaround:
# Unpad input_ids: remove padding tokens using attention_mask to get per-sequence lists
prompt_ids = [
[tok for tok, m in zip(ids, mask, strict=True) if m]
for ids, mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
else:
prompt_ids = tokenized["input_ids"]
# For VLMs, the processor returns extra multimodal fields (pixel_values, image_grid_thw, etc.)
multimodal_fields = {k: v for k, v in tokenized.items() if k not in ("input_ids", "attention_mask")}
else:
Expand Down
22 changes: 13 additions & 9 deletions trl/trainer/rloo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,22 +919,26 @@ def _tokenize_prompts(self, prompts: list):
images.append(prompt_images if prompt_images else None)
images = images if has_images else None

# We pass padding=True to work around a bug introduced in transformers 5.2.0 in some processors
# (e.g. Qwen2.5-VL) that crash on batched unpadded input. We then unpad input_ids using attention_mask.
# See: https://github.com/huggingface/transformers/issues/44514
# Workaround for a bug in transformers 5.3.0 where some processors (e.g. Qwen2.5-VL) crash on
# batched unpadded input (transformers#44514).
# Fixed in transformers 5.4.0 (transformers#44563).
needs_padding_workaround = Version("5.3.0") <= Version(transformers.__version__) < Version("5.4.0")
tokenized = self.processing_class.apply_chat_template(
conversation=prompts,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
padding=True,
**({"padding": True} if needs_padding_workaround else {}),
**self.chat_template_kwargs,
)
# Unpad input_ids: remove padding tokens using attention_mask to get per-sequence lists
prompt_ids = [
[tok for tok, m in zip(ids, mask, strict=True) if m]
for ids, mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
if needs_padding_workaround:
# Unpad input_ids: remove padding tokens using attention_mask to get per-sequence lists
prompt_ids = [
[tok for tok, m in zip(ids, mask, strict=True) if m]
for ids, mask in zip(tokenized["input_ids"], tokenized["attention_mask"], strict=True)
]
else:
prompt_ids = tokenized["input_ids"]
# For VLMs, the processor returns extra multimodal fields (pixel_values, image_grid_thw, etc.)
multimodal_fields = {k: v for k, v in tokenized.items() if k not in ("input_ids", "attention_mask")}
else:
Expand Down
Loading