Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions vllm/model_executor/models/aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
from vllm.model_executor.model_loader.weight_utils import default_weight_loader


# NOTE: The Aimv2Config used here is defined by Ovis
# (https://huggingface.co/AIDC-AI/Ovis2-1B/tree/main)
# It is different from the one inside Transformers library
class AIMv2SwiGLUFFN(nn.Module):

def __init__(self, config: PretrainedConfig,
Expand Down
14 changes: 7 additions & 7 deletions vllm/model_executor/models/dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import torch
import torch.nn as nn
from transformers import PretrainedConfig
from transformers import DbrxConfig

from vllm.attention import Attention
from vllm.config import CacheConfig, VllmConfig
Expand Down Expand Up @@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
params_dtype: Optional[torch.dtype] = None,
):
super().__init__()
Expand All @@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
Expand Down Expand Up @@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
Expand Down Expand Up @@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
14 changes: 8 additions & 6 deletions vllm/model_executor/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import DeepseekV3Config

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DeepseekV3Config,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
enable_eplb: bool = False,
Expand Down Expand Up @@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DeepseekV3Config,
hidden_size: int,
num_heads: int,
qk_nope_head_dim: int,
Expand Down Expand Up @@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DeepseekV3Config,
hidden_size: int,
num_heads: int,
qk_nope_head_dim: int,
Expand Down Expand Up @@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DeepseekV3Config,
prefix: str,
model_config: ModelConfig,
cache_config: Optional[CacheConfig] = None,
Expand Down Expand Up @@ -971,7 +971,9 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass


def get_spec_layer_idx_from_weight_name(config: PretrainedConfig,
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def get_spec_layer_idx_from_weight_name(config: DeepseekV3Config,
weight_name: str) -> Optional[int]:
if (hasattr(config, "num_nextn_predict_layers")
and config.num_nextn_predict_layers > 0):
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/dots1.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import Dots1Config

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Dots1Config,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
Expand Down Expand Up @@ -174,7 +174,7 @@ def __init__(
hidden_size: int,
num_heads: int,
num_kv_heads: int,
config: PretrainedConfig,
config: Dots1Config,
rope_theta: float = 10000,
rope_scaling: Optional[dict[str, Any]] = None,
max_position_embeddings: int = 8192,
Expand Down Expand Up @@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Dots1Config,
prefix: str,
model_config: ModelConfig,
cache_config: Optional[CacheConfig] = None,
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/minimax_text_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import torch.nn.functional as F
from einops import rearrange
from torch import nn
from transformers.configuration_utils import PretrainedConfig
from transformers import MiniMaxConfig

from vllm.attention import Attention, AttentionMetadata
from vllm.config import CacheConfig, VllmConfig
Expand Down Expand Up @@ -585,7 +585,7 @@ class MiniMaxText01DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MiniMaxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
expert_num: int = 1,
Expand Down Expand Up @@ -788,7 +788,7 @@ class MiniMaxText01Model(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MiniMaxConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
scheduler_config=None,
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import torch
import torch.nn as nn
from transformers import PretrainedConfig
from transformers import MptConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -50,7 +50,7 @@ class MPTAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MptConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -144,7 +144,7 @@ class MPTMLP(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MptConfig,
quant_config: Optional[QuantizationConfig] = None,
):
super().__init__()
Expand Down Expand Up @@ -176,7 +176,7 @@ class MPTBlock(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MptConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/olmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import OlmoeConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: OlmoeConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/qwen2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import torch
import torch.nn.functional as F
from torch import nn
from transformers import PretrainedConfig
from transformers import Qwen2MoeConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -98,7 +98,7 @@ class Qwen2MoeSparseMoeBlock(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Qwen2MoeConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
Expand Down Expand Up @@ -256,7 +256,7 @@ class Qwen2MoeDecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Qwen2MoeConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import Qwen3MoeConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -98,7 +98,7 @@ class Qwen3MoeSparseMoeBlock(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Qwen3MoeConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
Expand Down Expand Up @@ -242,7 +242,7 @@ class Qwen3MoeDecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Qwen3MoeConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down