Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ th {
| `WanPipeline` | Wan2.1-T2V, Wan2.2-T2V, Wan2.2-TI2V | `Wan-AI/Wan2.1-T2V-1.3B-Diffusers`, `Wan-AI/Wan2.1-T2V-14B-Diffusers`, `Wan-AI/Wan2.2-T2V-A14B-Diffusers`, `Wan-AI/Wan2.2-TI2V-5B-Diffusers` | ✅︎ | ✅︎ | ✅︎ | ✅︎ |
| `WanImageToVideoPipeline` | Wan2.2-I2V | `Wan-AI/Wan2.2-I2V-A14B-Diffusers` | ✅︎ | ✅︎ | ✅︎ | ✅︎ |
| `Wan22VACEPipeline` | Wan2.1-VACE | `Wan-AI/Wan2.1-VACE-1.3B-diffusers`, `Wan-AI/Wan2.1-VACE-14B-diffusers` | ✅︎ | ✅︎ | ✅︎ | ✅︎ |
| `AniSoraI2VCogVideoXPipeline` | AniSora-I2V (5B) | `Disty0/Index-anisora-5B-diffusers` | ✅︎ | ✅︎ | | |
| `AniSoraV2I2VPipeline` | AniSora-I2V (14B) | `aardsoul-music/Wan2.1-Anisora-14B` | ✅︎ | ✅︎ | | |
| `LTX2Pipeline` | LTX-2-T2V | `Lightricks/LTX-2` | ✅︎ | ✅︎ | | |
| `LTX2ImageToVideoPipeline` | LTX-2-I2V | `Lightricks/LTX-2` | ✅︎ | ✅︎ | | |
| `LTX2TwoStagesPipeline` | LTX-2-T2V | `rootonchair/LTX-2-19b-distilled` | ✅︎ | ✅︎ | | |
Expand Down
4 changes: 3 additions & 1 deletion docs/user_guide/diffusion_features.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,13 @@ The following tables show which models support each feature:
| **Helios** | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
| **HunyuanVideo-1.5 T2V I2V** | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ (decode) | ✅ | ❌ |
| **DreamID-Omni** | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
| **AniSora V1 (5B)** | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ |
| **AniSora V2 (14B)** | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ |

**Frame Interpolation Support**

- **Supported**: Wan2.2 text-to-video, image-to-video, and TI2V pipelines
- **Not supported**: Wan2.1-VACE, LTX-2, LTX-2.3, Helios, HunyuanVideo-1.5, DreamID-Omni
- **Not supported**: Wan2.1-VACE, LTX-2, LTX-2.3, Helios, HunyuanVideo-1.5, DreamID-Omni, AniSora

### AudioGen

Expand Down
157 changes: 157 additions & 0 deletions tests/e2e/offline_inference/test_anisora_i2v.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
E2E offline inference tests for Index-AniSora I2V models.

Copy link
Copy Markdown
Collaborator

@wtomin wtomin Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To check the functionality, we prioritize online serving test script over offline inference script. If you test cases are overlapped in the two test scripts, I recommend you to maintain the test case (e.g., tp=2) in online serving test script, and you can delete the test case in offline inference test script. This prevents duplicated test cases.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed test_anisora_v1_offline_tp2 and test_anisora_v2_offline_tp2 from the offline test file. TP=2 lifecycle coverage is now maintained only in test_anisora_online.py via test_anisora_v1_online_tp2_create_poll_download_delete as recommended.

Covers:
- V1 (5B, CogVideoX-based): AniSoraI2VCogVideoXPipeline
- V2 (14B, Wan2.1-based): AniSoraV2I2VPipeline
- TP=2 for both models (requires 2 GPUs)
"""

import gc
import os
import sys
from pathlib import Path

import PIL.Image
import pytest
import torch

# ruff: noqa: E402
REPO_ROOT = Path(__file__).resolve().parents[3]
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))

from tests.utils import hardware_test
from vllm_omni import Omni
from vllm_omni.diffusion.data import DiffusionParallelConfig
from vllm_omni.inputs.data import OmniDiffusionSamplingParams
from vllm_omni.outputs import OmniRequestOutput
from vllm_omni.platforms import current_omni_platform

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

MODEL_V1 = "Disty0/Index-anisora-5B-diffusers"
MODEL_V2 = "aardsoul-music/Wan2.1-Anisora-14B"

# V1: vae_scale_factor_temporal=4 → num_frames % 4 == 1, e.g. 5, 9, 13 ...
# V2: same constraint (Wan2.1 VAE)
NUM_FRAMES = 5
HEIGHT = 480
WIDTH = 720
SEED = 42


def _dummy_image() -> PIL.Image.Image:
"""Create a small solid-color image for testing."""
return PIL.Image.new("RGB", (WIDTH, HEIGHT), color=(100, 149, 237))


def _assert_video_output(output, num_frames: int, height: int, width: int) -> None:
assert output is not None
if isinstance(output, OmniRequestOutput):
assert output.final_output_type == "image"
assert output.request_output is not None
frames = output.request_output.images[0]
else:
frames = output
assert frames is not None
assert hasattr(frames, "shape"), f"Expected tensor, got {type(frames)}"
# shape: (batch, num_frames, height, width, channels)
# Pipeline may round num_frames up for VAE temporal alignment
assert frames.shape[1] >= num_frames, f"Expected >= {num_frames} frames, got {frames.shape[1]}"
assert frames.shape[2] == height
assert frames.shape[3] == width


def _cleanup(model):
"""Shut down model workers and free GPU memory between tests."""
model.shutdown()
del model
gc.collect()
torch.cuda.empty_cache()


# ---------------------------------------------------------------------------
# V1 (5B CogVideoX) — single GPU
# ---------------------------------------------------------------------------


@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100", "rocm": "MI325"})
def test_anisora_v1_offline_single_gpu():
"""V1 (5B) offline inference on a single GPU."""
model = Omni(model=MODEL_V1)
image = _dummy_image()
outputs = model.generate(
{"prompt": "a cat sitting calmly", "multi_modal_data": {"image": image}},
OmniDiffusionSamplingParams(
height=HEIGHT,
width=WIDTH,
num_frames=NUM_FRAMES,
num_inference_steps=2,
guidance_scale=6.0,
generator=torch.Generator(current_omni_platform.device_type).manual_seed(SEED),
),
)
result = outputs[0] if isinstance(outputs, list) else outputs
_assert_video_output(result, NUM_FRAMES, HEIGHT, WIDTH)
_cleanup(model)


# ---------------------------------------------------------------------------
# V1 (5B CogVideoX) — SP=2 (Ulysses)
# ---------------------------------------------------------------------------


@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100", "rocm": "MI325"}, num_cards=2)
def test_anisora_v1_offline_sp2():
"""V1 (5B) offline inference with sequence_parallel_size=2 (Ulysses)."""
model = Omni(
model=MODEL_V1,
parallel_config=DiffusionParallelConfig(sequence_parallel_size=2, ulysses_degree=2),
)
image = _dummy_image()
outputs = model.generate(
{"prompt": "a cat sitting calmly", "multi_modal_data": {"image": image}},
OmniDiffusionSamplingParams(
height=HEIGHT,
width=WIDTH,
num_frames=NUM_FRAMES,
num_inference_steps=2,
guidance_scale=6.0,
generator=torch.Generator(current_omni_platform.device_type).manual_seed(SEED),
),
)
result = outputs[0] if isinstance(outputs, list) else outputs
_assert_video_output(result, NUM_FRAMES, HEIGHT, WIDTH)
_cleanup(model)


# ---------------------------------------------------------------------------
# V1 (5B CogVideoX) — FP8 quantization, single GPU
# ---------------------------------------------------------------------------


@pytest.mark.diffusion
@hardware_test(res={"cuda": "H100", "rocm": "MI325"})
def test_anisora_v1_offline_fp8():
"""V1 (5B) offline inference with FP8 quantization (W8A8)."""
model = Omni(model=MODEL_V1, quantization="fp8")
image = _dummy_image()
outputs = model.generate(
{"prompt": "a cat sitting calmly", "multi_modal_data": {"image": image}},
OmniDiffusionSamplingParams(
height=HEIGHT,
width=WIDTH,
num_frames=NUM_FRAMES,
num_inference_steps=2,
guidance_scale=6.0,
generator=torch.Generator(current_omni_platform.device_type).manual_seed(SEED),
),
)
result = outputs[0] if isinstance(outputs, list) else outputs
_assert_video_output(result, NUM_FRAMES, HEIGHT, WIDTH)
_cleanup(model)
Loading
Loading