diff --git a/paddlenlp/transformers/llama/modeling.py b/paddlenlp/transformers/llama/modeling.py index 4c326cc89c00..93014e9917c7 100644 --- a/paddlenlp/transformers/llama/modeling.py +++ b/paddlenlp/transformers/llama/modeling.py @@ -239,6 +239,7 @@ def scaled_dot_product_attention( attention_mask is None, True, False, + False, )[0] else: attn_output = F.scaled_dot_product_attention(