From 3f2999cc11c5b6860d8efc20b7834ebce6f54ce0 Mon Sep 17 00:00:00 2001 From: hanzheli Date: Sat, 18 Apr 2026 07:57:25 +0000 Subject: [PATCH 1/3] [Feat] support HSDP for LTX-2 Signed-off-by: hanzheli --- docs/user_guide/diffusion_features.md | 2 +- .../examples/online_serving/text_to_video.md | 8 +++++++ tests/diffusion/models/ltx2/test_ltx2_hsdp.py | 22 +++++++++++++++++++ .../diffusion/models/ltx2/ltx2_transformer.py | 2 ++ 4 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/diffusion/models/ltx2/test_ltx2_hsdp.py diff --git a/docs/user_guide/diffusion_features.md b/docs/user_guide/diffusion_features.md index d70fdd9df7e..003f3bae353 100644 --- a/docs/user_guide/diffusion_features.md +++ b/docs/user_guide/diffusion_features.md @@ -140,7 +140,7 @@ The following tables show which models support each feature: |-------|:----------:|:-----------:|:---------------------:|:--------------:|:-----------------:|:------:|:------------------------:|:--------------------:|:--------------:|:----------------:| | **Wan2.2** | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (encode/decode) | ❌ | ❌ | | **Wan2.1-VACE** | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ❌ | ❌ | -| **LTX-2** | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| **LTX-2** | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | | **Helios** | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | | **HunyuanVideo-1.5 T2V I2V** | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ✅ | ❌ | | **DreamID-Omni** | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | diff --git a/docs/user_guide/examples/online_serving/text_to_video.md b/docs/user_guide/examples/online_serving/text_to_video.md index 00a9c167239..b918aac19d0 100644 --- a/docs/user_guide/examples/online_serving/text_to_video.md +++ b/docs/user_guide/examples/online_serving/text_to_video.md @@ -288,6 +288,14 @@ vllm serve Lightricks/LTX-2 --omni --port 8098 \ --enforce-eager --flow-shift 1.0 --boundary-ratio 1.0 ``` +For multi-GPU memory reduction, you can enable HSDP: + +```bash +vllm serve Lightricks/LTX-2 --omni --port 8098 \ + --enforce-eager --flow-shift 1.0 --boundary-ratio 1.0 \ + --use-hsdp --hsdp-shard-size 2 +``` + #### Start with Optimization Presets Use the LTX-2 startup script with built-in optimization presets: diff --git a/tests/diffusion/models/ltx2/test_ltx2_hsdp.py b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py new file mode 100644 index 00000000000..8b6d2fb15ec --- /dev/null +++ b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py @@ -0,0 +1,22 @@ +import torch.nn as nn + +from vllm_omni.diffusion.models.ltx2.ltx2_transformer import LTX2VideoTransformer3DModel + + +def test_ltx2_exposes_hsdp_shard_conditions_for_transformer_blocks(): + model = object.__new__(LTX2VideoTransformer3DModel) + nn.Module.__init__(model) + model.transformer_blocks = nn.ModuleList([nn.Linear(4, 4) for _ in range(2)]) + model.norm_out = nn.LayerNorm(4) + + conditions = getattr(model, "_hsdp_shard_conditions", None) + + assert conditions is not None + assert len(conditions) == 1 + + matched = [] + for name, module in model.named_modules(): + if any(cond(name, module) for cond in conditions): + matched.append(name) + + assert matched == ["transformer_blocks.0", "transformer_blocks.1"] diff --git a/vllm_omni/diffusion/models/ltx2/ltx2_transformer.py b/vllm_omni/diffusion/models/ltx2/ltx2_transformer.py index a1bf7f7809c..1840b5708b2 100644 --- a/vllm_omni/diffusion/models/ltx2/ltx2_transformer.py +++ b/vllm_omni/diffusion/models/ltx2/ltx2_transformer.py @@ -41,6 +41,7 @@ from vllm_omni.diffusion.attention.backends.abstract import AttentionMetadata from vllm_omni.diffusion.attention.layer import Attention +from vllm_omni.diffusion.distributed.hsdp_utils import is_transformer_block_module from vllm_omni.diffusion.distributed.sp_plan import SequenceParallelInput, SequenceParallelOutput from vllm_omni.diffusion.forward_context import get_forward_context, is_forward_context_available @@ -1264,6 +1265,7 @@ class LTX2VideoTransformer3DModel(nn.Module): _supports_gradient_checkpointing = True _skip_layerwise_casting_patterns = ["norm"] _repeated_blocks = ["LTX2VideoTransformerBlock"] + _hsdp_shard_conditions = [is_transformer_block_module] _sp_plan: dict[str, Any] | None = None @staticmethod From a134eebf98c25a31e7f97914ed284bf8fe9f53e4 Mon Sep 17 00:00:00 2001 From: hanzheli Date: Sat, 18 Apr 2026 11:15:40 +0000 Subject: [PATCH 2/3] Add pytest markers for LTX2 model tests Signed-off-by: hanzheli --- tests/diffusion/models/ltx2/test_ltx2_hsdp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/diffusion/models/ltx2/test_ltx2_hsdp.py b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py index 8b6d2fb15ec..70e09c62cfe 100644 --- a/tests/diffusion/models/ltx2/test_ltx2_hsdp.py +++ b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py @@ -1,7 +1,9 @@ +import pytest import torch.nn as nn from vllm_omni.diffusion.models.ltx2.ltx2_transformer import LTX2VideoTransformer3DModel +pytestmark = [pytest.mark.core_model, pytest.mark.cpu] def test_ltx2_exposes_hsdp_shard_conditions_for_transformer_blocks(): model = object.__new__(LTX2VideoTransformer3DModel) From ac8c06c7008afbaa39164191b1f93c0770a65639 Mon Sep 17 00:00:00 2001 From: hanzheli Date: Sat, 18 Apr 2026 11:29:26 +0000 Subject: [PATCH 3/3] fix: apply ruff format Signed-off-by: hanzheli --- tests/diffusion/models/ltx2/test_ltx2_hsdp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/diffusion/models/ltx2/test_ltx2_hsdp.py b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py index 70e09c62cfe..4dd07e1bf82 100644 --- a/tests/diffusion/models/ltx2/test_ltx2_hsdp.py +++ b/tests/diffusion/models/ltx2/test_ltx2_hsdp.py @@ -5,6 +5,7 @@ pytestmark = [pytest.mark.core_model, pytest.mark.cpu] + def test_ltx2_exposes_hsdp_shard_conditions_for_transformer_blocks(): model = object.__new__(LTX2VideoTransformer3DModel) nn.Module.__init__(model)