Skip to content

Commit

Permalink
Rename triton_fused_moe -> fused_moe_triton
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy committed Nov 24, 2024
1 parent fe5d3e8 commit d751006
Show file tree
Hide file tree
Showing 76 changed files with 19 additions and 19 deletions.
1 change: 0 additions & 1 deletion python/sglang/srt/layers/fused_moe/__init__.py

This file was deleted.

1 change: 1 addition & 0 deletions python/sglang/srt/layers/fused_moe_grok/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from sglang.srt.layers.fused_moe_grok.layer import FusedMoE, FusedMoEMethodBase
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from vllm.model_executor.layers.quantization.fp8 import Fp8Config
from vllm.model_executor.utils import set_weight_attrs

from sglang.srt.layers.fused_moe.fused_moe import padding_size
from sglang.srt.layers.fused_moe_grok.fused_moe import padding_size
from sglang.srt.utils import is_hip

logger = init_logger(__name__)
Expand Down Expand Up @@ -123,7 +123,7 @@ def forward_cuda(
num_expert_group: Optional[int],
topk_group: Optional[int],
) -> torch.Tensor:
from sglang.srt.layers.fused_moe.fused_moe import fused_moe
from sglang.srt.layers.fused_moe_grok.fused_moe import fused_moe

return fused_moe(
x,
Expand Down Expand Up @@ -609,7 +609,7 @@ def apply(
topk_group: Optional[int] = None,
) -> torch.Tensor:

from sglang.srt.layers.fused_moe.fused_moe import fused_moe
from sglang.srt.layers.fused_moe_grok.fused_moe import fused_moe

return fused_moe(
x,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from contextlib import contextmanager
from typing import Any, Dict, Optional

import sglang.srt.layers.triton_fused_moe.fused_moe # noqa
from sglang.srt.layers.triton_fused_moe.fused_moe import (
import sglang.srt.layers.fused_moe_triton.fused_moe # noqa
from sglang.srt.layers.fused_moe_triton.fused_moe import (
fused_experts,
fused_topk,
get_config_file_name,
grouped_topk,
)
from sglang.srt.layers.triton_fused_moe.layer import (
from sglang.srt.layers.fused_moe_triton.layer import (
FusedMoE,
FusedMoEMethodBase,
FusedMoeWeightScaleSupported,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def try_get_optimal_moe_config(
M: int,
is_marlin: bool = False,
):
from sglang.srt.layers.triton_fused_moe import get_config
from sglang.srt.layers.fused_moe_triton import get_config

override_config = get_config()
if override_config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from sglang.srt.utils import set_weight_attrs

if torch.cuda.is_available() or torch.hip.is_available():
from sglang.srt.layers.triton_fused_moe.fused_moe import fused_experts
from sglang.srt.layers.fused_moe_triton.fused_moe import fused_experts
else:
fused_experts = None # type: ignore

Expand Down Expand Up @@ -514,7 +514,7 @@ def select_experts(
num_expert_group: Optional[int] = None,
custom_routing_function: Optional[Callable] = None,
):
from sglang.srt.layers.triton_fused_moe.fused_moe import (
from sglang.srt.layers.fused_moe_triton.fused_moe import (
fused_topk,
grouped_topk,
)
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def fp8_get_quant_method(self, layer, prefix):
is_layer_skipped,
)

from sglang.srt.layers.triton_fused_moe.layer import FusedMoE
from sglang.srt.layers.fused_moe_triton.layer import FusedMoE

if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.transformers_utils.configs.dbrx import DbrxConfig

from sglang.srt.layers.fused_moe_triton import fused_moe
from sglang.srt.layers.linear import (
QKVParallelLinear,
ReplicatedLinear,
Expand All @@ -36,7 +37,6 @@
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.triton_fused_moe import fused_moe
from sglang.srt.layers.vocab_parallel_embedding import (
DEFAULT_VOCAB_PADDING_SIZE,
ParallelLMHead,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/deepseek.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.fused_moe_triton import fused_moe
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
MergedColumnParallelLinear,
Expand All @@ -40,7 +41,6 @@
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.triton_fused_moe import fused_moe
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.fused_moe_triton import FusedMoE
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
ColumnParallelLinear,
Expand All @@ -41,7 +42,6 @@
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.triton_fused_moe import FusedMoE
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/grok.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from vllm.model_executor.model_loader.loader import DefaultModelLoader
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.fused_moe import FusedMoE
from sglang.srt.layers.fused_moe_grok import FusedMoE
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
QKVParallelLinear,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.fused_moe_triton import FusedMoE
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
QKVParallelLinear,
Expand All @@ -35,7 +36,6 @@
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.torchao_utils import apply_torchao_config_
from sglang.srt.layers.triton_fused_moe import FusedMoE
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/olmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@
from vllm.utils import print_warning_once

from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.fused_moe_triton import FusedMoE
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.logits_processor import LogitsProcessor, LogitsProcessorOutput
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.triton_fused_moe import FusedMoE
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/qwen2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.fused_moe_triton import FusedMoE
from sglang.srt.layers.layernorm import RMSNorm
from sglang.srt.layers.linear import (
MergedColumnParallelLinear,
Expand All @@ -41,7 +42,6 @@
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.torchao_utils import apply_torchao_config_
from sglang.srt.layers.triton_fused_moe import FusedMoE
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/models/xverse_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.model_executor.model_loader.weight_utils import default_weight_loader

from sglang.srt.layers.fused_moe_triton import fused_moe
from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.triton_fused_moe import fused_moe
from sglang.srt.layers.vocab_parallel_embedding import (
ParallelLMHead,
VocabParallelEmbedding,
Expand Down

0 comments on commit d751006

Please sign in to comment.