Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
94 commits
Select commit Hold shift + click to select a range
5195265
Cross layers implementation
liranschour Dec 4, 2025
0f36888
Fix linting
liranschour Dec 7, 2025
8d36b4b
Add cross layers compatibility check
liranschour Dec 10, 2025
2a20197
Move cross_layers logic into TpKVTopology
liranschour Dec 11, 2025
073b30e
Code review minor fix
liranschour Dec 11, 2025
b403a9e
Linting...
liranschour Dec 11, 2025
06d3184
Code review fixes
liranschour Dec 17, 2025
cd27866
Update vllm/distributed/kv_transfer/kv_connector/utils.py
liranschour Dec 22, 2025
19319af
Update vllm/distributed/kv_transfer/kv_connector/utils.py
liranschour Dec 22, 2025
994bf1d
Update vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
liranschour Dec 22, 2025
0efeba3
Update vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
liranschour Dec 22, 2025
ef8e7ad
Update vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
liranschour Dec 22, 2025
6e2b751
Code review fixes
liranschour Dec 22, 2025
5e66e8f
Code review fix
liranschour Dec 28, 2025
eaf5e3d
Code review fix
liranschour Dec 28, 2025
e85f458
Code review fix
liranschour Dec 28, 2025
fff0935
Merge remote-tracking branch 'vllm/main' into nixl_kv_cont_cross_layers
liranschour Dec 28, 2025
9bd9598
Code review fix
liranschour Dec 28, 2025
cd57ed8
Merge remote-tracking branch 'vllm/main' into nixl_kv_cont_cross_layers
liranschour Jan 13, 2026
f153e83
n/a
liranschour Jan 13, 2026
15f2a78
n/a
liranschour Jan 13, 2026
0cb1825
n/a
liranschour Jan 13, 2026
9630c8e
n/a
liranschour Jan 13, 2026
c148f6d
n/a
liranschour Jan 13, 2026
96329f6
n/a
liranschour Jan 13, 2026
0394b36
Merge remote-tracking branch 'vllm/main' into nixl_kv_cont_cross_layers
liranschour Jan 14, 2026
b4d7045
Unit test fix
liranschour Jan 15, 2026
52b1155
Unit test fix
liranschour Jan 15, 2026
e34db34
Unit test fix
liranschour Jan 15, 2026
edc0755
Unit test fix
liranschour Jan 15, 2026
03af3ec
n/a
liranschour Jan 15, 2026
f0f2cf9
n/a
liranschour Jan 15, 2026
c3e1e5e
Merge branch 'main' into nixl_kv_cont_cross_layers
liranschour Jan 18, 2026
701d4ef
Code review fix
liranschour Jan 18, 2026
e7df5f8
Code review fix
liranschour Jan 18, 2026
6dae9b5
Code review fix
liranschour Jan 19, 2026
012bb9e
Handle hetrogenous TP for FLASHINFER and TRITON
liranschour Jan 20, 2026
99b3401
n/a
liranschour Jan 20, 2026
9fe2eb6
n/a
liranschour Jan 20, 2026
043c4d8
n/a
liranschour Jan 20, 2026
fe7197c
Run cross layers only for FlashAttention and FLASHINFER
liranschour Jan 21, 2026
5d59ea6
Enhance test_register_kv_caches
liranschour Jan 21, 2026
392e5d5
Documentation
liranschour Jan 21, 2026
d0e9aed
Merge branch 'main' into nixl_kv_cont_cross_layers
liranschour Jan 21, 2026
3c6921f
Code review fix
liranschour Jan 21, 2026
ed3180c
Code review fix
liranschour Jan 21, 2026
ced9ad4
Code review fix
liranschour Jan 21, 2026
580dbc4
Code review fix
liranschour Jan 21, 2026
19fff29
Code review fix
liranschour Jan 21, 2026
dd97e99
n/a
liranschour Jan 21, 2026
fce2050
n/a
liranschour Jan 21, 2026
7161150
n/a
liranschour Jan 21, 2026
4d3890e
n/a
liranschour Jan 21, 2026
d92bf96
n/a
liranschour Jan 21, 2026
6991cdd
n/a
liranschour Jan 21, 2026
2791d34
n/a
liranschour Jan 21, 2026
ff1f244
n/a
liranschour Jan 21, 2026
92f2628
Add cross layers blocks to run_accuracy_tests.sh
liranschour Jan 21, 2026
4715ced
Code review fix
liranschour Jan 22, 2026
c2e0ca0
Code review fix
liranschour Jan 22, 2026
7d1df76
Merge branch 'main' into nixl_kv_cont_cross_layers
liranschour Jan 22, 2026
d9ad710
Minor fix
liranschour Jan 22, 2026
7f0d3b4
n/a
liranschour Jan 28, 2026
156359e
n/a
liranschour Jan 28, 2026
fceb3f9
n/a
liranschour Jan 28, 2026
9f3f8c4
n/a
liranschour Jan 28, 2026
79bf223
Merge remote-tracking branch 'vllm/main' into nixl_kv_merge
liranschour Jan 28, 2026
a19fc2c
n/a
liranschour Jan 28, 2026
eea914c
Calculate block_size_position
liranschour Jan 28, 2026
568e641
Enhance test_register_kv_caches() to test cross layers case
liranschour Jan 29, 2026
6096ce9
n/a
liranschour Jan 29, 2026
c993465
Merge branch 'main' into nixl_kv_cont_cross_layers
liranschour Jan 29, 2026
264c550
Code review fix
liranschour Jan 29, 2026
d14a487
Update vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
liranschour Feb 2, 2026
969c3a1
Code review fix
liranschour Feb 2, 2026
f0335d3
Update vllm/distributed/kv_transfer/kv_connector/utils.py
liranschour Feb 2, 2026
7f25bca
Code review fix
liranschour Feb 2, 2026
858f856
Added CI tests
liranschour Feb 2, 2026
ff7e8c7
n/a
liranschour Feb 2, 2026
08f1ac4
n/a
liranschour Feb 2, 2026
1c734a9
CI fix
liranschour Feb 2, 2026
e51a290
Code review fix
liranschour Feb 3, 2026
0ba9b05
Code review fix
liranschour Feb 3, 2026
60cf003
n/a
liranschour Feb 3, 2026
b95de28
n/a
liranschour Feb 3, 2026
6142cfb
n/a
liranschour Feb 3, 2026
f210774
n/a
liranschour Feb 3, 2026
e1832cb
n/a
liranschour Feb 3, 2026
b8252dd
n/a
liranschour Feb 3, 2026
bd9f1c4
n/a
liranschour Feb 3, 2026
4652655
n/a
liranschour Feb 3, 2026
16b3d06
Merge branch 'main' into nixl_kv_cont_cross_layers
liranschour Feb 3, 2026
fa0ee18
Merge branch 'main' into nixl_kv_cont_cross_layers
NickLucche Feb 4, 2026
ac8903f
retrigger tests
liranschour Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/features/nixl_connector_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,15 @@ Support use case: Prefill with 'HND' and decode with 'NHD' with experimental con
--kv-transfer-config '{..., "enable_permute_local_kv":"True"}'
```

### Cross layers blocks

By default, this feature is disabled. On attention backends that support this feature, each logical block is contiguous in physical memory. This reduces the number of buffers that need to be transferred.
To enable this feature:

```bash
--kv-transfer-config '{..., "kv_connector_extra_config": {"enable_cross_layers_blocks": "True"}}'
```

## Example Scripts/Code

Refer to these example scripts in the vLLM repository:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ tp_configs=(
"GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=1 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny"
)
dp_ep_configs=(
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1)
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP2, D-DPEP=2 (TP=1)
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=1 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP1, D-DPEP=2 (TP=1)
"DP_EP=1 GPU_MEMORY_UTILIZATION=0.8 PREFILLER_TP_SIZE=2 DECODER_TP_SIZE=2 MODEL_NAMES=deepseek-ai/deepseek-vl2-tiny" # MLA+P-TP2, D-DPEP=2 (TP=1)
)

# Select config array based on DP_EP env var
Expand Down Expand Up @@ -57,3 +57,9 @@ if [[ -n "${FLASHINFER:-}" ]]; then
else
echo "FLASHINFER not set, skipping FLASHINFER runs."
fi

# Check if cross-layers is enabled (non-empty)
if [[ -n "${CROSS_LAYERS_BLOCKS:-}" ]]; then
echo "CROSS_LAYERS_BLOCKS is set, rerunning with --enable-cross-layers"
run_tests "default backend" "--enable-cross-layers"
fi
15 changes: 13 additions & 2 deletions tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set -xe
# Parse command line arguments
KV_BUFFER_DEVICE="cuda" # Default to cuda
ATTENTION_BACKEND="" # Default to empty (use vllm default)
CROSS_LAYERS_BLOCKS="False"
while [[ $# -gt 0 ]]; do
case $1 in
--kv_buffer_device)
Expand All @@ -14,6 +15,10 @@ while [[ $# -gt 0 ]]; do
ATTENTION_BACKEND="$2"
shift 2
;;
--enable-cross-layers)
CROSS_LAYERS_BLOCKS="True"
shift 1
;;
*)
echo "Unknown option $1"
echo "Usage: $0 [--kv_buffer_device <cuda|cpu>] [--attention-backend <backend>]"
Expand All @@ -34,11 +39,17 @@ else
KV_CONFIG_HETERO_LAYOUT=''
fi

if [[ "$CROSS_LAYERS_BLOCKS" == "True" ]]; then
KV_EXTRA_CONFIG=',"kv_connector_extra_config":{"enable_cross_layers_blocks": "True"}'
else
KV_EXTRA_CONFIG=''
fi

# Build the kv-transfer-config once
if [[ "$KV_BUFFER_DEVICE" == "cuda" ]]; then
KV_CONFIG='{"kv_connector":"NixlConnector","kv_role":"kv_both"'${KV_CONFIG_HETERO_LAYOUT}'}'
KV_CONFIG='{"kv_connector":"NixlConnector","kv_role":"kv_both"'${KV_CONFIG_HETERO_LAYOUT}${KV_EXTRA_CONFIG}'}'
else
KV_CONFIG="{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"$KV_BUFFER_DEVICE\""${KV_CONFIG_HETERO_LAYOUT}"}"
KV_CONFIG="{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"$KV_BUFFER_DEVICE\""${KV_CONFIG_HETERO_LAYOUT}${KV_EXTRA_CONFIG}"}"
fi

# Models to run
Expand Down
Loading