diff --git a/docs/design/custom_op.md b/docs/design/custom_op.md index a62d033072b1..17a57159147e 100644 --- a/docs/design/custom_op.md +++ b/docs/design/custom_op.md @@ -51,11 +51,8 @@ For example: **1. Attention:** ```python ---8<-- "vllm/model_executor/layers/attention/mm_encoder_attention.py:mm_encoder_attn" - --8<-- "vllm/model_executor/layers/mla.py:multi_head_latent_attention" ---8<-- "vllm/model_executor/models/deepencoder.py:rel_pos_attention" ``` **2. Activation:** @@ -170,6 +167,16 @@ For example: --8<-- "vllm/model_executor/layers/rotary_embedding/common.py:apply_rotary_emb" ``` +**12. Encoder:** + +```python +--8<-- "vllm/model_executor/models/deepencoder2.py:qwen2_decoder" + +--8<-- "vllm/model_executor/layers/attention/mm_encoder_attention.py:mm_encoder_attn" + +--8<-- "vllm/model_executor/models/deepencoder.py:rel_pos_attention" +``` + ## Guidelines for Implementing a New CustomOp ### Implement a New CustomOp in vLLM diff --git a/vllm/model_executor/models/deepencoder2.py b/vllm/model_executor/models/deepencoder2.py index f134249ebfbe..fdec155d5345 100644 --- a/vllm/model_executor/models/deepencoder2.py +++ b/vllm/model_executor/models/deepencoder2.py @@ -14,14 +14,20 @@ import torch.nn as nn import transformers +from vllm.model_executor.custom_op import PluggableLayer -class CustomQwen2Decoder(nn.Module): + +# --8<-- [start:qwen2_decoder] +@PluggableLayer.register("qwen2_decoder") +class CustomQwen2Decoder(PluggableLayer): """ Qwen2 visual encoder non-causal attention + causal attention token_type_ids :0=non-causal, 1=causal """ + # --8<-- [end:qwen2_decoder] + def __init__( self, decoder_layer: int = 24,