diff --git a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py index 5cfaac9e4418..c121e663ae83 100644 --- a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py +++ b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py @@ -649,11 +649,9 @@ def forward_extend( if is_target_verify: batch_size = seq_len // forward_batch.spec_info.draft_token_num draft_token_num = forward_batch.spec_info.draft_token_num - mixed_qkv_reshaped = ( - mixed_qkv.view(batch_size, draft_token_num, -1) - .transpose(1, 2) - .contiguous() - ) + mixed_qkv_reshaped = mixed_qkv.view( + batch_size, draft_token_num, -1 + ).transpose(1, 2) mixed_qkv_processed = causal_conv1d_update( mixed_qkv_reshaped, conv_states_to_use, @@ -666,9 +664,7 @@ def forward_extend( retrieve_next_sibling=retrieve_next_sibling, retrieve_parent_token=retrieve_parent_token, ) - mixed_qkv = ( - mixed_qkv_processed.transpose(1, 2).contiguous().view(seq_len, -1) - ) + mixed_qkv = mixed_qkv_processed.transpose(1, 2).view(seq_len, -1) else: mixed_qkv = causal_conv1d_fn( mixed_qkv.transpose(0, 1),