Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/sglang/srt/layers/attention/wave_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import triton
import triton.language as tl

from sglang.srt.layers.attention import AttentionBackend
from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
from sglang.srt.layers.attention.utils import create_flashinfer_kv_indices_triton
from sglang.srt.layers.dp_attention import get_attention_tp_size
from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
Expand Down Expand Up @@ -88,7 +88,7 @@ class WaveAttnBackend(AttentionBackend):
def __init__(
self,
model_runner: ModelRunner,
skip_prefill: bool,
skip_prefill: bool = False,
kv_indptr_buf: Optional[torch.Tensor] = None,
):
# Lazy import to avoid the initialization of cuda context
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ def process_weights_hip_int4(self, layer: Module):
layer.w13_weight_scale1[expert_id] *= max_w13_scales[expert_id]
layer.w2_weight_scale1[expert_id] *= layer.w2_weight_scale[expert_id]

def process_weights_hip_scale_padding(self, layer: Module, padding_size: int):
def process_weights_hip_scale_padding(self, layer: Module, padding_size: int=0):
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
padding_size, # Avoid circular import
)
Expand Down
Loading