From 3d9b83db22fd3b27d99f1cfa0adddedf126dde45 Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Thu, 15 Jan 2026 14:58:28 +0800 Subject: [PATCH 1/7] implement PluggableLayer Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/layers/pluggable_layer.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vllm/model_executor/layers/pluggable_layer.py diff --git a/vllm/model_executor/layers/pluggable_layer.py b/vllm/model_executor/layers/pluggable_layer.py new file mode 100644 index 000000000000..62c460f6d297 --- /dev/null +++ b/vllm/model_executor/layers/pluggable_layer.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +import torch.nn as nn + +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +class PluggableLayer(nn.Module): + """ + Base class for pluggable layers. + + A PluggableLayer is a *module-composing* abstraction: it may instantiate other + ``torch.nn.Module`` objects as sub-layers, and its functionality depends on + these sub-layers following a generalized invocation sequence. Also, it is stateful + and may hold parameters or buffers. + + Unlike :class:`CustomOp`, PluggableLayer does NOT provide per-platform + ``forward_*`` dispatch. Instead, it supports out-of-tree (OOT) replacement + of the entire layer class at instantiation time, allowing customized + initialization and submodule composition. + """ + + def __new__(cls, *args, **kwargs): + try: + layer_class_name = cls.__name__ + except AttributeError: + raise TypeError( + f"Cannot instantiate '{cls.__name__}': its 'name' attribute " + f"was not set, possibly because it was not decorated with " + f"@PluggableLayer.register, or it's the PluggableLayer base class itself." + ) from None + + if layer_class_name not in cls.layer_registry_oot: + layer_cls_to_instantiate = cls + else: + layer_cls_to_instantiate = cls.layer_registry_oot[layer_class_name] + logger.debug( + "Instantiating pluggable layer: %s using %s", + layer_class_name, + str(layer_cls_to_instantiate), + ) + return super().__new__(layer_cls_to_instantiate) + + # Dictionary of all pluggable layers (classes, indexed by registered name). + layer_registry_oot: dict[str, type["PluggableLayer"]] = {} + + # Decorator to register out-of-tree(oot) pluggable layers. + # For OOT pluggable layers: + # if in-tree layer class is registered with an oot_custom_layer, + # the oot_custom_layer will be used instead. + @classmethod + def register_oot(cls, _decorated_layer_cls=None, name: str | None = None): + def decorator(layer_cls): + reg_name = name if name is not None else cls.__name__ + assert reg_name not in cls.layer_registry_oot, ( + f"Duplicate layer name: {reg_name}" + ) + layer_cls.name = reg_name + cls.layer_registry_oot[reg_name] = layer_cls + return layer_cls + + if _decorated_layer_cls is None: + # Called with parentheses: @PluggableLayer.register_oot() + # or @PluggableLayer.register_oot(name="...") + return decorator + elif isinstance(_decorated_layer_cls, type): # Check if it's a class + # Called without parentheses: @PluggableLayer.register_oot + return decorator(_decorated_layer_cls) + else: + raise TypeError("Decorator can only be applied to classes.") From 0d76cd86f6599d0860ad18b72ca61b8a13ba8f5d Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Thu, 15 Jan 2026 16:55:32 +0800 Subject: [PATCH 2/7] move pluggable_layer back to custom op Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/custom_op.py | 64 ++++++++++++++++ vllm/model_executor/layers/pluggable_layer.py | 74 ------------------- 2 files changed, 64 insertions(+), 74 deletions(-) delete mode 100644 vllm/model_executor/layers/pluggable_layer.py diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index 81ba544b4813..0db95381f7bd 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -11,6 +11,70 @@ logger = init_logger(__name__) +class PluggableLayer(nn.Module): + """ + Base class for pluggable layers. + + A PluggableLayer is a *module-composing* abstraction: it may instantiate other + ``torch.nn.Module`` objects as sub-layers, and its functionality depends on + these sub-layers following a generalized invocation sequence. Also, it is stateful + and may hold parameters or buffers. + + Unlike :class:`CustomOp`, PluggableLayer does NOT provide per-platform + ``forward_*`` dispatch. Instead, it supports out-of-tree (OOT) replacement + of the entire layer class at instantiation time, allowing customized + initialization and submodule composition. + """ + + def __new__(cls, *args, **kwargs): + try: + layer_class_name = cls.__name__ + except AttributeError: + raise TypeError( + f"Cannot instantiate '{cls.__name__}': its 'name' attribute " + f"was not set, possibly because it was not decorated with " + f"@PluggableLayer.register, or it's the PluggableLayer base class itself." + ) from None + + if layer_class_name not in cls.layer_registry_oot: + layer_cls_to_instantiate = cls + else: + layer_cls_to_instantiate = cls.layer_registry_oot[layer_class_name] + logger.debug( + "Instantiating pluggable layer: %s using %s", + layer_class_name, + str(layer_cls_to_instantiate), + ) + return super().__new__(layer_cls_to_instantiate) + + # Dictionary of all pluggable layers (classes, indexed by registered name). + layer_registry_oot: dict[str, type["PluggableLayer"]] = {} + + # Decorator to register out-of-tree(oot) pluggable layers. + # For OOT pluggable layers: + # if in-tree layer class is registered with an oot_custom_layer, + # the oot_custom_layer will be used instead. + @classmethod + def register_oot(cls, _decorated_layer_cls=None, name: str | None = None): + def decorator(layer_cls): + reg_name = name if name is not None else cls.__name__ + assert reg_name not in cls.layer_registry_oot, ( + f"Duplicate layer name: {reg_name}" + ) + layer_cls.name = reg_name + cls.layer_registry_oot[reg_name] = layer_cls + return layer_cls + + if _decorated_layer_cls is None: + # Called with parentheses: @PluggableLayer.register_oot() + # or @PluggableLayer.register_oot(name="...") + return decorator + elif isinstance(_decorated_layer_cls, type): # Check if it's a class + # Called without parentheses: @PluggableLayer.register_oot + return decorator(_decorated_layer_cls) + else: + raise TypeError("Decorator can only be applied to classes.") + class CustomOp(nn.Module): """ Base class for custom ops. diff --git a/vllm/model_executor/layers/pluggable_layer.py b/vllm/model_executor/layers/pluggable_layer.py deleted file mode 100644 index 62c460f6d297..000000000000 --- a/vllm/model_executor/layers/pluggable_layer.py +++ /dev/null @@ -1,74 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - - -import torch.nn as nn - -from vllm.logger import init_logger - -logger = init_logger(__name__) - - -class PluggableLayer(nn.Module): - """ - Base class for pluggable layers. - - A PluggableLayer is a *module-composing* abstraction: it may instantiate other - ``torch.nn.Module`` objects as sub-layers, and its functionality depends on - these sub-layers following a generalized invocation sequence. Also, it is stateful - and may hold parameters or buffers. - - Unlike :class:`CustomOp`, PluggableLayer does NOT provide per-platform - ``forward_*`` dispatch. Instead, it supports out-of-tree (OOT) replacement - of the entire layer class at instantiation time, allowing customized - initialization and submodule composition. - """ - - def __new__(cls, *args, **kwargs): - try: - layer_class_name = cls.__name__ - except AttributeError: - raise TypeError( - f"Cannot instantiate '{cls.__name__}': its 'name' attribute " - f"was not set, possibly because it was not decorated with " - f"@PluggableLayer.register, or it's the PluggableLayer base class itself." - ) from None - - if layer_class_name not in cls.layer_registry_oot: - layer_cls_to_instantiate = cls - else: - layer_cls_to_instantiate = cls.layer_registry_oot[layer_class_name] - logger.debug( - "Instantiating pluggable layer: %s using %s", - layer_class_name, - str(layer_cls_to_instantiate), - ) - return super().__new__(layer_cls_to_instantiate) - - # Dictionary of all pluggable layers (classes, indexed by registered name). - layer_registry_oot: dict[str, type["PluggableLayer"]] = {} - - # Decorator to register out-of-tree(oot) pluggable layers. - # For OOT pluggable layers: - # if in-tree layer class is registered with an oot_custom_layer, - # the oot_custom_layer will be used instead. - @classmethod - def register_oot(cls, _decorated_layer_cls=None, name: str | None = None): - def decorator(layer_cls): - reg_name = name if name is not None else cls.__name__ - assert reg_name not in cls.layer_registry_oot, ( - f"Duplicate layer name: {reg_name}" - ) - layer_cls.name = reg_name - cls.layer_registry_oot[reg_name] = layer_cls - return layer_cls - - if _decorated_layer_cls is None: - # Called with parentheses: @PluggableLayer.register_oot() - # or @PluggableLayer.register_oot(name="...") - return decorator - elif isinstance(_decorated_layer_cls, type): # Check if it's a class - # Called without parentheses: @PluggableLayer.register_oot - return decorator(_decorated_layer_cls) - else: - raise TypeError("Decorator can only be applied to classes.") From d6674097b2e89258d699dd7b58bb51cdca2d3eae Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Thu, 15 Jan 2026 17:16:08 +0800 Subject: [PATCH 3/7] move out op_registry and op_registry_oot Signed-off-by: whx-sjtu <2952154980@qq.com> --- docs/design/custom_op.md | 9 ------- vllm/model_executor/custom_op.py | 43 +++++++++++++++----------------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/docs/design/custom_op.md b/docs/design/custom_op.md index 13c2915abe8f..3f4934b15699 100644 --- a/docs/design/custom_op.md +++ b/docs/design/custom_op.md @@ -8,15 +8,6 @@ This document will introduce how CustomOp works in vLLM and how to implement a n `CustomOp` manages two dictionaries of all custom ops (i.e., op classes, indexed by registered name) in its class, for vLLM and OOT plugins respectively. -??? code - - ```python - class CustomOp(nn.Module): - - op_registry: dict[str, type["CustomOp"]] = {} - op_registry_oot: dict[str, type["CustomOp"]] = {} - ``` - We can use `@CustomOp.register("op_name")` to register an op class to the `CustomOp` system. After this, the `op_name` and its class will be added into the `op_registry` dictionary. In addition, We can also register an OOT op by `@CustomOp.register_oot("op_name")`. We will introduce this mechanism in detail later. When a `CustomOp` is called (i.e., call its `forward()` method), if it is enabled (i.e., with `--compilation_config.custom_ops '["+op_name"]'`), it will automatically dispatch the forward method to the appropriate backend according to `current_platform`. Otherwise (i.e., it is disabled), it will only call the `forward_native()` method to use PyTorch-native implementation of this forward method. diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index 0db95381f7bd..ecaa26c99835 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -11,6 +11,15 @@ logger = init_logger(__name__) +# Dictionary of all custom ops (classes, indexed by registered name). +# To check if an op with a name is enabled, call .enabled() on the class. +# Examples: +# - MyOp.enabled() +# - op_registry["my_op"].enabled() +op_registry: dict[str, type["CustomOp"]] = {} +op_registry_oot: dict[str, type["CustomOp"]] = {} + + class PluggableLayer(nn.Module): """ Base class for pluggable layers. @@ -36,10 +45,10 @@ def __new__(cls, *args, **kwargs): f"@PluggableLayer.register, or it's the PluggableLayer base class itself." ) from None - if layer_class_name not in cls.layer_registry_oot: + if layer_class_name not in op_registry_oot: layer_cls_to_instantiate = cls else: - layer_cls_to_instantiate = cls.layer_registry_oot[layer_class_name] + layer_cls_to_instantiate = op_registry_oot[layer_class_name] logger.debug( "Instantiating pluggable layer: %s using %s", layer_class_name, @@ -47,9 +56,6 @@ def __new__(cls, *args, **kwargs): ) return super().__new__(layer_cls_to_instantiate) - # Dictionary of all pluggable layers (classes, indexed by registered name). - layer_registry_oot: dict[str, type["PluggableLayer"]] = {} - # Decorator to register out-of-tree(oot) pluggable layers. # For OOT pluggable layers: # if in-tree layer class is registered with an oot_custom_layer, @@ -58,11 +64,9 @@ def __new__(cls, *args, **kwargs): def register_oot(cls, _decorated_layer_cls=None, name: str | None = None): def decorator(layer_cls): reg_name = name if name is not None else cls.__name__ - assert reg_name not in cls.layer_registry_oot, ( - f"Duplicate layer name: {reg_name}" - ) + assert reg_name not in op_registry_oot, f"Duplicate layer name: {reg_name}" layer_cls.name = reg_name - cls.layer_registry_oot[reg_name] = layer_cls + op_registry_oot[reg_name] = layer_cls return layer_cls if _decorated_layer_cls is None: @@ -75,6 +79,7 @@ def decorator(layer_cls): else: raise TypeError("Decorator can only be applied to classes.") + class CustomOp(nn.Module): """ Base class for custom ops. @@ -91,10 +96,10 @@ def __new__(cls, *args, **kwargs): f"@CustomOp.register, or it's the CustomOp base class itself." ) from None - if op_name not in cls.op_registry_oot: + if op_name not in op_registry_oot: op_cls_to_instantiate = cls else: - op_cls_to_instantiate = cls.op_registry_oot[op_name] + op_cls_to_instantiate = op_registry_oot[op_name] logger.debug( "Instantiating custom op: %s using %s", op_name, @@ -214,21 +219,13 @@ def default_on() -> bool: return not count_none > 0 or count_all > 0 - # Dictionary of all custom ops (classes, indexed by registered name). - # To check if an op with a name is enabled, call .enabled() on the class. - # Examples: - # - MyOp.enabled() - # - op_registry["my_op"].enabled() - op_registry: dict[str, type["CustomOp"]] = {} - op_registry_oot: dict[str, type["CustomOp"]] = {} - # Decorator to register custom ops. @classmethod def register(cls, name: str): def decorator(op_cls): - assert name not in cls.op_registry, f"Duplicate op name: {name}" + assert name not in op_registry, f"Duplicate op name: {name}" op_cls.name = name - cls.op_registry[name] = op_cls + op_registry[name] = op_cls return op_cls return decorator @@ -246,9 +243,9 @@ def decorator(op_cls): def register_oot(cls, _decorated_op_cls=None, name: str | None = None): def decorator(op_cls): reg_name = name if name is not None else cls.__name__ - assert reg_name not in cls.op_registry_oot, f"Duplicate op name: {reg_name}" + assert reg_name not in op_registry_oot, f"Duplicate op name: {reg_name}" op_cls.name = reg_name - cls.op_registry_oot[reg_name] = op_cls + op_registry_oot[reg_name] = op_cls return op_cls if _decorated_op_cls is None: From 535d532a3ab2c373c77a646c61b46a0097032ab6 Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Thu, 15 Jan 2026 17:33:46 +0800 Subject: [PATCH 4/7] apply pluggalbe layer to mla Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/custom_op.py | 11 +++++++++++ vllm/model_executor/layers/mla.py | 19 ++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index ecaa26c99835..bdd6759bfd4c 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -56,6 +56,17 @@ def __new__(cls, *args, **kwargs): ) return super().__new__(layer_cls_to_instantiate) + # Decorator to register pluggable layers. + @classmethod + def register(cls, name: str): + def decorator(op_cls): + assert name not in op_registry, f"Duplicate op name: {name}" + op_cls.name = name + op_registry[name] = op_cls + return op_cls + + return decorator + # Decorator to register out-of-tree(oot) pluggable layers. # For OOT pluggable layers: # if in-tree layer class is registered with an oot_custom_layer, diff --git a/vllm/model_executor/layers/mla.py b/vllm/model_executor/layers/mla.py index 65541d2a485a..2549f1221f36 100644 --- a/vllm/model_executor/layers/mla.py +++ b/vllm/model_executor/layers/mla.py @@ -6,7 +6,7 @@ from vllm.attention.layer import MLAAttention from vllm.config import CacheConfig -from vllm.model_executor.custom_op import CustomOp +from vllm.model_executor.custom_op import PluggableLayer from vllm.model_executor.layers.quantization import QuantizationConfig @@ -30,13 +30,13 @@ class MLAModules: # --8<-- [start:multi_head_latent_attention] -@CustomOp.register("multi_head_latent_attention") -class MultiHeadLatentAttentionWrapper(CustomOp): - """MLA layer registered as CustomOp to allow OOT backends to add +@PluggableLayer.register("multi_head_latent_attention") +class MultiHeadLatentAttentionWrapper(PluggableLayer): + """Pluggable MLA layer which allows OOT backends to add custom implementations of the outer MLA layer (including rope & o_proj). - Note that currently MLA ignores the enable/disable mechanism of CustomOp - because there is only one in-tree implementation in forward_native. - TODO: implement this with a new PluggableLayer mechanism. + Note that currently oot platforms can still use CustomOp.register_oot to + replace MLA layer entirly, although we use PluggableLayer to register + this layer now. This class takes positions and hidden_states as input. The input tensors can either contain prefill tokens or decode tokens. @@ -110,7 +110,7 @@ def __init__( self.prefix = prefix - def forward_native( + def forward( self, positions: torch.Tensor, hidden_states: torch.Tensor, @@ -174,6 +174,3 @@ def forward_native( ) return self.o_proj(attn_out)[0] - - def forward_cuda(self, *args, **kwargs): - return self.forward_native(*args, **kwargs) From e582d804c7ac4d8bdb79a951a1c6735e34d6fb17 Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Thu, 15 Jan 2026 17:43:38 +0800 Subject: [PATCH 5/7] fix lint Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/custom_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index bdd6759bfd4c..a9b151aee2c9 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -42,7 +42,7 @@ def __new__(cls, *args, **kwargs): raise TypeError( f"Cannot instantiate '{cls.__name__}': its 'name' attribute " f"was not set, possibly because it was not decorated with " - f"@PluggableLayer.register, or it's the PluggableLayer base class itself." + f"@PluggableLayer.register, or it's the PluggableLayer itself." ) from None if layer_class_name not in op_registry_oot: From d22bc77141c40eb9c88541bb5ba672094321fdbf Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Tue, 20 Jan 2026 21:25:33 +0800 Subject: [PATCH 6/7] fix ci Signed-off-by: whx-sjtu <2952154980@qq.com> --- tests/model_executor/test_enabled_custom_ops.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 8ee1b1a37ca6..316caf06b29c 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -11,7 +11,7 @@ get_cached_compilation_config, set_current_vllm_config, ) -from vllm.model_executor.custom_op import CustomOp +from vllm.model_executor.custom_op import CustomOp, op_registry from vllm.model_executor.layers.activation import ( GeluAndMul, ReLUSquaredActivation, @@ -98,17 +98,17 @@ def test_enabled_ops( ops_enabled = [bool(x) for x in ops_enabled] assert RMSNorm(1024).enabled() == ops_enabled[0] - assert CustomOp.op_registry["rms_norm"].enabled() == ops_enabled[0] + assert op_registry["rms_norm"].enabled() == ops_enabled[0] assert SiluAndMul().enabled() == ops_enabled[1] - assert CustomOp.op_registry["silu_and_mul"].enabled() == ops_enabled[1] + assert op_registry["silu_and_mul"].enabled() == ops_enabled[1] assert GeluAndMul().enabled() == ops_enabled[2] - assert CustomOp.op_registry["gelu_and_mul"].enabled() == ops_enabled[2] + assert op_registry["gelu_and_mul"].enabled() == ops_enabled[2] # If registered, subclasses should follow their own name assert Relu3().enabled() == ops_enabled[3] - assert CustomOp.op_registry["relu3"].enabled() == ops_enabled[3] + assert op_registry["relu3"].enabled() == ops_enabled[3] # Unregistered subclass class SiluAndMul2(SiluAndMul): From c6cdb550b154dd00bf515b69e3a814e0eefc1b2b Mon Sep 17 00:00:00 2001 From: whx-sjtu <2952154980@qq.com> Date: Tue, 20 Jan 2026 22:07:42 +0800 Subject: [PATCH 7/7] fix typing Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/custom_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index a9b151aee2c9..6fe252fa27ee 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -16,8 +16,8 @@ # Examples: # - MyOp.enabled() # - op_registry["my_op"].enabled() -op_registry: dict[str, type["CustomOp"]] = {} -op_registry_oot: dict[str, type["CustomOp"]] = {} +op_registry: dict[str, type["CustomOp"] | type["PluggableLayer"]] = {} +op_registry_oot: dict[str, type["CustomOp"] | type["PluggableLayer"]] = {} class PluggableLayer(nn.Module):