Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions vllm/config/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,8 @@ def __post_init__(self):
)
if self.speculative_config.disable_padded_drafter_batch:
raise ValueError(
"async scheduling for EAGLE/MTP kind of speculative "
"decoding is enabled, but disable_padded_drafter_batch=True "
"disable_padded_drafter_batch=True is not supported for "
"this situation now. please set "
"disable_padded_drafter_batch=Fasle"
"Async scheduling is not compatible with "
"disable_padded_drafter_batch=True."
)
if not executor_supports_async_sched:
raise ValueError(
Expand All @@ -597,32 +594,40 @@ def __post_init__(self):
elif self.scheduler_config.async_scheduling is None:
# Enable async scheduling unless there is an incompatible option.
if self.parallel_config.pipeline_parallel_size > 1:
logger.warning(
logger.warning_once(
"Async scheduling is not yet supported with "
"pipeline_parallel_size > 1 and will be disabled."
"pipeline_parallel_size > 1 and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False
elif self.speculative_config is not None:
if self.speculative_config.method not in get_args(EagleModelTypes):
logger.warning(
"Async scheduling not supported with %s-based "
"speculative decoding and will be disabled.",
self.speculative_config.method,
)
else:
logger.warning(
"Async scheduling will be disabled because some features do "
"not currently work in conjunction with speculative decoding. "
"To use async scheduling with spec decoding anyway, "
"enable it explicitly via async_scheduling=True."
)
elif (
self.speculative_config is not None
and self.speculative_config.method not in get_args(EagleModelTypes)
):
logger.warning_once(
"Async scheduling not supported with %s-based "
"speculative decoding and will be disabled.",
self.speculative_config.method,
scope="local",
)
self.scheduler_config.async_scheduling = False
elif (
self.speculative_config is not None
and self.speculative_config.disable_padded_drafter_batch
):
logger.warning_once(
"Async scheduling is not compatible with "
"disable_padded_drafter_batch=True and will be disabled.",
scope="local",
)
self.scheduler_config.async_scheduling = False
elif not executor_supports_async_sched:
logger.warning(
logger.warning_once(
"Async scheduling will be disabled because it is not supported "
"with the `%s` distributed executor backend (only `mp`, `uni`, and "
"`external_launcher` are supported).",
executor_backend,
scope="local",
)
self.scheduler_config.async_scheduling = False
else:
Expand Down