diff --git a/src/transformers/models/modernbert/modeling_modernbert.py b/src/transformers/models/modernbert/modeling_modernbert.py index c363eaefcf3c..727640ac87c8 100644 --- a/src/transformers/models/modernbert/modeling_modernbert.py +++ b/src/transformers/models/modernbert/modeling_modernbert.py @@ -905,6 +905,8 @@ def forward( inputs_embeds, indices, cu_seqlens, max_seqlen, *_ = _unpad_modernbert_input( inputs=inputs_embeds, attention_mask=attention_mask ) + if position_ids is None: + position_ids = indices.unsqueeze(0) else: if position_ids is None: position_ids = torch.arange(seq_len, device=device).unsqueeze(0) diff --git a/src/transformers/models/modernbert/modular_modernbert.py b/src/transformers/models/modernbert/modular_modernbert.py index 9e535d345f2f..6dd1a547a320 100644 --- a/src/transformers/models/modernbert/modular_modernbert.py +++ b/src/transformers/models/modernbert/modular_modernbert.py @@ -1014,6 +1014,8 @@ def forward( inputs_embeds, indices, cu_seqlens, max_seqlen, *_ = _unpad_modernbert_input( inputs=inputs_embeds, attention_mask=attention_mask ) + if position_ids is None: + position_ids = indices.unsqueeze(0) else: if position_ids is None: position_ids = torch.arange(seq_len, device=device).unsqueeze(0)