Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ARG TRITON_COMMIT="improve_fa_decode_3.0.0"


ARG AITER_REPO="https://github.com/ROCm/aiter.git"
ARG AITER_COMMIT="v0.1.2"
ARG AITER_COMMIT="v0.1.3"

RUN git clone ${SGL_REPO} \
&& cd sglang \
Expand Down
3 changes: 2 additions & 1 deletion python/sglang/srt/layers/moe/fused_moe_triton/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

if _use_aiter:
from aiter import ActivationType
from aiter.fused_moe import fused_moe
from aiter.fused_moe_bf16_asm import ck_moe_2stages
from aiter.ops.shuffle import shuffle_weight

Expand Down Expand Up @@ -204,7 +205,7 @@ def forward_cuda(
topk_weights, dtype=torch.float32
) # topk_weights must be FP32 (float32)

return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
Expand Down
16 changes: 8 additions & 8 deletions python/sglang/srt/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -1052,15 +1052,15 @@ def maybe_apply_hip_fused_experts(
if _use_hip_int4:
# TODO: add triton kernel and add check _use_aiter
assert not no_combine, f"{no_combine=} is not supported."
return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
topk_weights,
topk_ids,
QuantType.per_Token,
layer.w13_weight_scale1,
layer.w2_weight_scale1,
quant_type=QuantType.per_Token,
w1_scale=layer.w13_weight_scale1,
w2_scale=layer.w2_weight_scale1,
activation=(
ActivationType.Silu if activation == "silu" else ActivationType.Gelu
),
Expand All @@ -1086,15 +1086,15 @@ def maybe_apply_hip_fused_experts(
expert_mask=None,
)
else:
return ck_moe_2stages(
return fused_moe(
x,
layer.w13_weight,
layer.w2_weight,
topk_weights,
topk_ids,
QuantType.per_Token,
layer.w13_weight_scale1,
layer.w2_weight_scale1,
quant_type=QuantType.per_Token,
w1_scale=layer.w13_weight_scale1,
w2_scale=layer.w2_weight_scale1,
activation=(
ActivationType.Silu
if activation == "silu"
Expand Down
Loading