Skip to content
8 changes: 4 additions & 4 deletions tests/entrypoints/test_omni_entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,15 @@ def fake_engine(*args: Any, **kwargs: Any) -> FakeAsyncOmniEngine:

parser = argparse.ArgumentParser()
parser.add_argument("--gpu-memory-utilization", type=float, default=0.9)
parser.add_argument("--hsdp-shard-size", type=int, default=-1)
parser.add_argument("--batch-timeout", type=int, default=10)
nullify_stage_engine_defaults(parser)
args = parser.parse_args([])
args.model = "fake-model"

Omni(**vars(args))

assert captured["gpu_memory_utilization"] is None
assert captured["hsdp_shard_size"] == -1
assert captured["batch_timeout"] == 10
assert "_cli_explicit_keys" not in captured


Expand Down Expand Up @@ -233,15 +233,15 @@ def fake_engine(*args: Any, **kwargs: Any) -> FakeAsyncOmniEngine:

parser = argparse.ArgumentParser()
parser.add_argument("--gpu-memory-utilization", type=float, default=0.9)
parser.add_argument("--hsdp-shard-size", type=int, default=-1)
parser.add_argument("--batch-timeout", type=int, default=10)
args = parser.parse_args([])
args.model = "fake-model"

with pytest.deprecated_call(match="from_cli_args"):
Omni.from_cli_args(args, parser=parser)

assert captured["gpu_memory_utilization"] is None
assert captured["hsdp_shard_size"] == -1
assert captured["batch_timeout"] == 10


def _make_base():
Expand Down
9 changes: 6 additions & 3 deletions tests/test_arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,16 +393,19 @@ def test_nullify_stage_engine_defaults_resets_inherited_defaults():
def test_non_override_flags_keep_real_defaults_after_nullify():
import argparse

from vllm_omni.config.stage_config import deploy_override_field_names
from vllm_omni.engine.arg_utils import nullify_stage_engine_defaults

parser = argparse.ArgumentParser()
parser.add_argument("--hsdp-shard-size", type=int, default=-1, help="HSDP shard size.")
parser.add_argument("--batch-timeout", type=int, default=10, help="Batch timeout.")
parser.add_argument("--max-num-seqs", type=int, default=64, help="Max num seqs.")
nullify_stage_engine_defaults(parser)

hsdp = next(a for a in parser._actions if a.dest == "hsdp_shard_size")
assert "batch_timeout" not in deploy_override_field_names()

batch_timeout = next(a for a in parser._actions if a.dest == "batch_timeout")
max_num_seqs = next(a for a in parser._actions if a.dest == "max_num_seqs")
assert hsdp.default == -1
assert batch_timeout.default == 10
assert max_num_seqs.default is None


Expand Down
223 changes: 215 additions & 8 deletions tests/test_config_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,199 @@ def test_to_omegaconf_omits_none_deploy_overrides_for_engine_args(self):
for name in deploy_override_field_names() - {"devices"}:
assert name not in engine_args

def test_to_omegaconf_diffusion_parallel_overrides_replace_nested_values(self):
config = StageConfig(
stage_id=1,
model_stage="diffusion",
stage_type=StageType.DIFFUSION,
yaml_engine_args={
"parallel_config": {
"pipeline_parallel_size": 1,
"data_parallel_size": 1,
"tensor_parallel_size": 4,
"enable_expert_parallel": False,
"ulysses_degree": 1,
"ring_degree": 1,
"ulysses_mode": "strict",
"sequence_parallel_size": 1,
"cfg_parallel_size": 1,
"vae_patch_parallel_size": 1,
"use_hsdp": False,
"hsdp_shard_size": -1,
"hsdp_replicate_size": 1,
}
},
runtime_overrides={
"pipeline_parallel_size": 2,
"data_parallel_size": 3,
"tensor_parallel_size": 8,
"enable_expert_parallel": True,
"ulysses_degree": 2,
"ring_degree": 4,
"ulysses_mode": "advanced_uaa",
"sequence_parallel_size": 8,
"cfg_parallel_size": 2,
"vae_patch_parallel_size": 2,
"use_hsdp": True,
"hsdp_shard_size": 8,
"hsdp_replicate_size": 2,
},
)

omega_config = config.to_omegaconf()

assert omega_config.engine_args.parallel_config.pipeline_parallel_size == 2
assert omega_config.engine_args.parallel_config.data_parallel_size == 3
assert omega_config.engine_args.parallel_config.tensor_parallel_size == 8
assert omega_config.engine_args.parallel_config.enable_expert_parallel is True
assert omega_config.engine_args.parallel_config.ulysses_degree == 2
assert omega_config.engine_args.parallel_config.ring_degree == 4
assert omega_config.engine_args.parallel_config.ulysses_mode == "advanced_uaa"
assert omega_config.engine_args.parallel_config.sequence_parallel_size == 8
assert omega_config.engine_args.parallel_config.cfg_parallel_size == 2
assert omega_config.engine_args.parallel_config.vae_patch_parallel_size == 2
assert omega_config.engine_args.parallel_config.use_hsdp is True
assert omega_config.engine_args.parallel_config.hsdp_shard_size == 8
assert omega_config.engine_args.parallel_config.hsdp_replicate_size == 2
assert "pipeline_parallel_size" not in omega_config.engine_args
assert "data_parallel_size" not in omega_config.engine_args
assert "tensor_parallel_size" not in omega_config.engine_args
assert "enable_expert_parallel" not in omega_config.engine_args
assert "ulysses_degree" not in omega_config.engine_args
assert "ring_degree" not in omega_config.engine_args
assert "ulysses_mode" not in omega_config.engine_args
assert "sequence_parallel_size" not in omega_config.engine_args
assert "cfg_parallel_size" not in omega_config.engine_args
assert "vae_patch_parallel_size" not in omega_config.engine_args
assert "use_hsdp" not in omega_config.engine_args
assert "hsdp_shard_size" not in omega_config.engine_args
assert "hsdp_replicate_size" not in omega_config.engine_args

def test_to_omegaconf_diffusion_parallel_overrides_create_parallel_config(self):
config = StageConfig(
stage_id=1,
model_stage="diffusion",
stage_type=StageType.DIFFUSION,
runtime_overrides={
"pipeline_parallel_size": 2,
"data_parallel_size": 3,
"tensor_parallel_size": 8,
"enable_expert_parallel": True,
"ulysses_degree": 2,
"ring_degree": 4,
"ulysses_mode": "advanced_uaa",
"sequence_parallel_size": 8,
"cfg_parallel_size": 2,
"vae_patch_parallel_size": 2,
"use_hsdp": True,
"hsdp_shard_size": 8,
"hsdp_replicate_size": 2,
},
)

omega_config = config.to_omegaconf()

assert omega_config.engine_args.parallel_config.pipeline_parallel_size == 2
assert omega_config.engine_args.parallel_config.data_parallel_size == 3
assert omega_config.engine_args.parallel_config.tensor_parallel_size == 8
assert omega_config.engine_args.parallel_config.enable_expert_parallel is True
assert omega_config.engine_args.parallel_config.ulysses_degree == 2
assert omega_config.engine_args.parallel_config.ring_degree == 4
assert omega_config.engine_args.parallel_config.ulysses_mode == "advanced_uaa"
assert omega_config.engine_args.parallel_config.sequence_parallel_size == 8
assert omega_config.engine_args.parallel_config.cfg_parallel_size == 2
assert omega_config.engine_args.parallel_config.vae_patch_parallel_size == 2
assert omega_config.engine_args.parallel_config.use_hsdp is True
assert omega_config.engine_args.parallel_config.hsdp_shard_size == 8
assert omega_config.engine_args.parallel_config.hsdp_replicate_size == 2
assert "pipeline_parallel_size" not in omega_config.engine_args
assert "data_parallel_size" not in omega_config.engine_args
assert "tensor_parallel_size" not in omega_config.engine_args
assert "enable_expert_parallel" not in omega_config.engine_args
assert "ulysses_degree" not in omega_config.engine_args
assert "ring_degree" not in omega_config.engine_args
assert "ulysses_mode" not in omega_config.engine_args
assert "sequence_parallel_size" not in omega_config.engine_args
assert "cfg_parallel_size" not in omega_config.engine_args
assert "vae_patch_parallel_size" not in omega_config.engine_args
assert "use_hsdp" not in omega_config.engine_args
assert "hsdp_shard_size" not in omega_config.engine_args
assert "hsdp_replicate_size" not in omega_config.engine_args

def test_to_omegaconf_diffusion_parallel_degree_overrides_recompute_sequence_parallel_size(self):
config = StageConfig(
stage_id=1,
model_stage="diffusion",
stage_type=StageType.DIFFUSION,
yaml_engine_args={
"parallel_config": {
"sequence_parallel_size": 1,
"ulysses_degree": 1,
"ring_degree": 1,
}
},
runtime_overrides={
"ulysses_degree": 2,
"ring_degree": 4,
},
)

omega_config = config.to_omegaconf()

assert omega_config.engine_args.parallel_config.ulysses_degree == 2
assert omega_config.engine_args.parallel_config.ring_degree == 4
assert omega_config.engine_args.parallel_config.sequence_parallel_size == 8
assert "ulysses_degree" not in omega_config.engine_args
assert "ring_degree" not in omega_config.engine_args
assert "sequence_parallel_size" not in omega_config.engine_args

def test_to_omegaconf_diffusion_parallel_explicit_sequence_parallel_size_is_preserved(self):
config = StageConfig(
stage_id=1,
model_stage="diffusion",
stage_type=StageType.DIFFUSION,
yaml_engine_args={
"parallel_config": {
"sequence_parallel_size": 1,
"ulysses_degree": 1,
"ring_degree": 1,
}
},
runtime_overrides={
"ulysses_degree": 2,
"ring_degree": 4,
"sequence_parallel_size": 16,
},
)

omega_config = config.to_omegaconf()

assert omega_config.engine_args.parallel_config.ulysses_degree == 2
assert omega_config.engine_args.parallel_config.ring_degree == 4
assert omega_config.engine_args.parallel_config.sequence_parallel_size == 16

def test_to_omegaconf_llm_parallel_overrides_remain_top_level(self):
config = StageConfig(
stage_id=0,
model_stage="thinker",
stage_type=StageType.LLM,
runtime_overrides={
"pipeline_parallel_size": 2,
"data_parallel_size": 3,
"tensor_parallel_size": 8,
},
)

omega_config = config.to_omegaconf()

assert omega_config.engine_args.pipeline_parallel_size == 2
assert omega_config.engine_args.data_parallel_size == 3
assert omega_config.engine_args.tensor_parallel_size == 8
assert "pipeline_parallel_size" in omega_config.engine_args
assert "data_parallel_size" in omega_config.engine_args
assert "tensor_parallel_size" in omega_config.engine_args
assert "parallel_config" not in omega_config.engine_args


class TestModelPipeline:
"""Tests for ModelPipeline class."""
Expand Down Expand Up @@ -828,31 +1021,45 @@ class TestDeployConfigLoading:
def test_deploy_override_fields_include_deploy_schema_fields(self):
expected_fields = {
"async_chunk",
# StageDeployConfig: stage placement and runtime fields.
"devices",
# StageDeployConfig: vLLM EngineArgs fields.
"async_scheduling",
"compilation_config",
"config_format",
"data_parallel_size",
"devices",
"disable_hybrid_kv_cache_manager",
"distributed_executor_backend",
"dtype",
"enable_chunked_prefill",
"enable_flashinfer_autotune",
"enable_prefix_caching",
"enforce_eager",
"gpu_memory_utilization",
"load_format",
"max_model_len",
"max_num_batched_tokens",
"max_num_seqs",
"mm_processor_cache_gb",
"pipeline_parallel_size",
"profiler_config",
"quantization",
"skip_mm_profiling",
"subtalker_sampling_params",
"tensor_parallel_size",
"tokenizer_mode",
# StageDeployConfig: diffusion parallel_config deploy override fields.
"cfg_parallel_size",
"enable_expert_parallel",
"hsdp_replicate_size",
"hsdp_shard_size",
"ring_degree",
"sequence_parallel_size",
"ulysses_degree",
"ulysses_mode",
"use_hsdp",
"vae_patch_parallel_size",
# DeployConfig: pipeline-wide engine settings.
"data_parallel_size",
"distributed_executor_backend",
"dtype",
"enable_chunked_prefill",
"enable_prefix_caching",
"pipeline_parallel_size",
"quantization",
"trust_remote_code",
}

Expand Down
Loading
Loading