diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 4c2a6c6b985b..f5cc7e9762de 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -759,6 +759,9 @@ def cascade_attention( if v_descale is not None else None, ) + prefix_lse = prefix_lse.contiguous() + suffix_lse = suffix_lse.contiguous() + # Merge prefix and suffix outputs, and store the result in output. merge_attn_states(output, prefix_output, prefix_lse, suffix_output, suffix_lse)