Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions vllm/model_executor/models/aimv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

import torch
import torch.nn as nn
from transformers import PretrainedConfig

from vllm.attention.layer import MultiHeadAttention
from vllm.distributed import get_tensor_model_parallel_world_size
Expand All @@ -21,12 +20,13 @@
from vllm.model_executor.layers.quantization.base_config import (
QuantizationConfig)
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.transformers_utils.configs.ovis import AIMv2Config


class AIMv2SwiGLUFFN(nn.Module):

def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
super().__init__()
hidden_features = config.intermediate_size
in_features = config.hidden_size
Expand Down Expand Up @@ -57,7 +57,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

class AIMv2PatchEmbed(nn.Module):

def __init__(self, config: PretrainedConfig):
def __init__(self, config: AIMv2Config):
super().__init__()
self.proj = nn.Conv2d(
config.num_channels,
Expand All @@ -75,7 +75,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

class AIMv2ViTPreprocessor(nn.Module):

def __init__(self, config: PretrainedConfig):
def __init__(self, config: AIMv2Config):
super().__init__()
num_patches = (config.image_size // config.patch_size)**2

Expand All @@ -93,8 +93,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

class AIMv2Attention(nn.Module):

def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
Expand Down Expand Up @@ -141,8 +141,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:

class AIMv2Block(nn.Module):

def __init__(self, config: PretrainedConfig,
quant_config: QuantizationConfig, prefix: str):
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
prefix: str):
super().__init__()
self.attn = AIMv2Attention(config,
quant_config=quant_config,
Expand All @@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
Expand Down Expand Up @@ -193,7 +193,7 @@ def forward(self, tokens: torch.Tensor) -> torch.Tensor:
class AIMv2Model(torch.nn.Module):

def __init__(self,
config: PretrainedConfig,
config: AIMv2Config,
quant_config: QuantizationConfig,
*,
require_post_norm: Optional[bool] = None,
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/commandr.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

import torch
from torch import nn
from transformers import CohereConfig
from transformers import Cohere2Config, CohereConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -89,7 +89,7 @@ class CohereMLP(nn.Module):

def __init__(
self,
config: CohereConfig,
config: Union[CohereConfig, Cohere2Config],
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
Expand Down Expand Up @@ -124,7 +124,7 @@ class CohereAttention(nn.Module):

def __init__(
self,
config: CohereConfig,
config: Union[CohereConfig, Cohere2Config],
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -242,7 +242,7 @@ def forward(
class CohereDecoderLayer(nn.Module):

def __init__(self,
config: CohereConfig,
config: Union[CohereConfig, Cohere2Config],
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = ""):
Expand Down
14 changes: 7 additions & 7 deletions vllm/model_executor/models/dbrx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import torch
import torch.nn as nn
from transformers import PretrainedConfig
from transformers import DbrxConfig

from vllm.attention import Attention
from vllm.config import CacheConfig, VllmConfig
Expand Down Expand Up @@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
params_dtype: Optional[torch.dtype] = None,
):
super().__init__()
Expand All @@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
Expand Down Expand Up @@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
quant_config: Optional[QuantizationConfig] = None,
params_dtype: Optional[torch.dtype] = None,
prefix: str = "",
Expand Down Expand Up @@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: DbrxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
15 changes: 9 additions & 6 deletions vllm/model_executor/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import DeepseekV2Config, DeepseekV3Config

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Union[DeepseekV2Config, DeepseekV3Config],
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
enable_eplb: bool = False,
Expand Down Expand Up @@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Union[DeepseekV2Config, DeepseekV3Config],
hidden_size: int,
num_heads: int,
qk_nope_head_dim: int,
Expand Down Expand Up @@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Union[DeepseekV2Config, DeepseekV3Config],
hidden_size: int,
num_heads: int,
qk_nope_head_dim: int,
Expand Down Expand Up @@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Union[DeepseekV2Config, DeepseekV3Config],
prefix: str,
model_config: ModelConfig,
cache_config: Optional[CacheConfig] = None,
Expand Down Expand Up @@ -957,7 +957,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
pass


def get_spec_layer_idx_from_weight_name(config: PretrainedConfig,
# Compatibility with
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
def get_spec_layer_idx_from_weight_name(config: Union[DeepseekV2Config,
DeepseekV3Config],
weight_name: str) -> Optional[int]:
if (hasattr(config, "num_nextn_predict_layers")
and config.num_nextn_predict_layers > 0):
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/dots1.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import Dots1Config

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Dots1Config,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
):
Expand Down Expand Up @@ -174,7 +174,7 @@ def __init__(
hidden_size: int,
num_heads: int,
num_kv_heads: int,
config: PretrainedConfig,
config: Dots1Config,
rope_theta: float = 10000,
rope_scaling: Optional[dict[str, Any]] = None,
max_position_embeddings: int = 8192,
Expand Down Expand Up @@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Dots1Config,
prefix: str,
model_config: ModelConfig,
cache_config: Optional[CacheConfig] = None,
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/exaone4.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import Exaone4Config

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Exaone4Config,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
Expand Down Expand Up @@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Exaone4Config,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
10 changes: 5 additions & 5 deletions vllm/model_executor/models/glm4_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers.models.glm4_moe import Glm4MoeConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -100,7 +100,7 @@ class Glm4MoE(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Glm4MoeConfig,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
enable_eplb: bool = False,
Expand Down Expand Up @@ -198,7 +198,7 @@ class Glm4MoeAttention(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Glm4MoeConfig,
hidden_size: int,
num_heads: int,
num_kv_heads: int,
Expand Down Expand Up @@ -297,7 +297,7 @@ class Glm4MoeDecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: Glm4MoeConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down Expand Up @@ -683,7 +683,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
return self.model.get_expert_mapping()


def get_spec_layer_idx_from_weight_name(config: PretrainedConfig,
def get_spec_layer_idx_from_weight_name(config: Glm4MoeConfig,
weight_name: str) -> Optional[int]:
if hasattr(config,
"num_nextn_predict_layers") and (config.num_nextn_predict_layers
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/minimax_text_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import torch.nn.functional as F
from einops import rearrange
from torch import nn
from transformers.configuration_utils import PretrainedConfig
from transformers import MiniMaxConfig

from vllm.attention import Attention, AttentionMetadata
from vllm.config import CacheConfig, VllmConfig
Expand Down Expand Up @@ -585,7 +585,7 @@ class MiniMaxText01DecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MiniMaxConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
expert_num: int = 1,
Expand Down Expand Up @@ -788,7 +788,7 @@ class MiniMaxText01Model(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: MiniMaxConfig,
quant_config: Optional[QuantizationConfig] = None,
cache_config: Optional[CacheConfig] = None,
scheduler_config=None,
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/olmoe.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import torch
from torch import nn
from transformers import PretrainedConfig
from transformers import OlmoeConfig

from vllm.attention import Attention
from vllm.compilation.decorators import support_torch_compile
Expand Down Expand Up @@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):

def __init__(
self,
config: PretrainedConfig,
config: OlmoeConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
Expand Down
Loading