From 94697a50af93b98420bd1a4719952fd7901e9fc2 Mon Sep 17 00:00:00 2001 From: Paul Zhang Date: Tue, 8 Jul 2025 12:47:40 -0500 Subject: [PATCH] Remove `mha` param from Wave decode attention kernel Depends on https://github.com/iree-org/iree-turbine/pull/1039 Signed-off-by: Paul Zhang --- python/sglang/srt/layers/attention/wave_ops/decode_attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/sglang/srt/layers/attention/wave_ops/decode_attention.py b/python/sglang/srt/layers/attention/wave_ops/decode_attention.py index aadcb3079991..cf804780f63d 100644 --- a/python/sglang/srt/layers/attention/wave_ops/decode_attention.py +++ b/python/sglang/srt/layers/attention/wave_ops/decode_attention.py @@ -78,7 +78,6 @@ def get_wave_kernel( max_kv_splits, input_dtype=input_dtype, output_dtype=output_dtype, - mha=mha, logit_cap=logit_cap, ) hyperparams_0.update(get_default_scheduling_params())