Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
85 commits
Select commit Hold shift + click to select a range
affd919
added parser for moe detection with test
morrison-turnansky Oct 14, 2025
86cbc77
Set up -O infrastrucutre
adabeyta Oct 14, 2025
b82140e
name change and removed editing backedn in _apply_optimization_level …
morrison-turnansky Oct 14, 2025
ea16053
updated defaults for each pass config
morrison-turnansky Oct 15, 2025
887f9aa
set cuda graph mode defaults
morrison-turnansky Oct 15, 2025
2abcafe
added skelaton for non model specifc settings, and test to veriy that…
morrison-turnansky Oct 15, 2025
4778dc0
made is_model_moe inaccessible from user
morrison-turnansky Oct 15, 2025
51b7e8e
added parsing function to determine if model is quantized
morrison-turnansky Oct 15, 2025
2a37bfb
added model specific optimizations
morrison-turnansky Oct 15, 2025
3300079
updated default config design
morrison-turnansky Oct 15, 2025
560dccf
added vllm config default test
morrison-turnansky Oct 16, 2025
f5ce445
Update vllm/config/vllm.py
morrison-turnansky Oct 16, 2025
1378181
reviewer comments
morrison-turnansky Oct 16, 2025
55a9085
moved functions to optimization.py
morrison-turnansky Oct 16, 2025
5ecaff2
Add lambda-based callable defaults to vLLM config
adabeyta Oct 16, 2025
904ab7e
added quant_fp8 and rms_norm to defaults
morrison-turnansky Oct 16, 2025
5057dca
added optimization level to engine args
morrison-turnansky Oct 16, 2025
77183b3
reviewer comments
morrison-turnansky Oct 16, 2025
cd498f2
typos
morrison-turnansky Oct 16, 2025
939d035
added warning
morrison-turnansky Oct 16, 2025
e240cd7
Add lambda-based callable defaults to vLLM config
adabeyta Oct 16, 2025
af5b48b
Update vllm/engine/arg_utils.py
morrison-turnansky Oct 16, 2025
1229504
Update vllm/config/model.py
morrison-turnansky Oct 16, 2025
a414df5
reviewer comments
morrison-turnansky Oct 16, 2025
a57a5d9
Update vllm/config/vllm.py
morrison-turnansky Oct 17, 2025
f1529a7
Update vllm/config/model.py
morrison-turnansky Oct 17, 2025
cd65961
linting
morrison-turnansky Oct 17, 2025
cdaa51d
Add tests for explicit config overrides and fix OptimizationLevel import
adabeyta Oct 17, 2025
d4c6b88
Fix pre-commit issues
adabeyta Oct 17, 2025
60b2e54
added test cases for model parsing functions
morrison-turnansky Oct 17, 2025
929a88f
changed behavior of -0num and -0 num to map to optimziation level
morrison-turnansky Oct 29, 2025
28df435
Update vllm/config/vllm.py
morrison-turnansky Oct 29, 2025
89fb5bf
changed | None = None to Field(default = None)
morrison-turnansky Oct 29, 2025
7de833b
added disclamier for configs about invalid states
morrison-turnansky Oct 29, 2025
a7c86fc
fixed parse arges tests
morrison-turnansky Oct 29, 2025
1ca27ea
doc fix
morrison-turnansky Oct 30, 2025
7ee1a5b
Update vllm/config/compilation.py
morrison-turnansky Oct 30, 2025
23cd230
reviwer comments
morrison-turnansky Oct 30, 2025
ec6d6de
Make resolve config take only VllmConfig
adabeyta Oct 30, 2025
f112b72
reviewer comments 2
morrison-turnansky Oct 31, 2025
55834f9
added lambda in build_default
morrison-turnansky Oct 31, 2025
a8788ea
reviewer comments
morrison-turnansky Oct 31, 2025
c4ed382
removed debugging statements
morrison-turnansky Oct 31, 2025
7e474ed
added RedHatAI/Llama-3.1-8B-Instruct-NVFP4 test case
morrison-turnansky Oct 31, 2025
629b9d1
merge
morrison-turnansky Nov 3, 2025
a37d6f1
Update vllm/config/vllm.py
morrison-turnansky Nov 7, 2025
af3c0ae
Update vllm/config/vllm.py
morrison-turnansky Nov 7, 2025
d516761
Update vllm/config/vllm.py
morrison-turnansky Nov 7, 2025
bf5e6c8
reviwer comments
morrison-turnansky Nov 7, 2025
6ceb6c0
moved optimization levels to global config
morrison-turnansky Nov 7, 2025
e3912df
merge
morrison-turnansky Nov 12, 2025
e3d205c
spelling
morrison-turnansky Nov 12, 2025
739abbb
Update vllm/config/vllm.py
morrison-turnansky Nov 12, 2025
3079f2e
Update vllm/config/vllm.py
morrison-turnansky Nov 12, 2025
f579455
Update tests/engine/test_arg_utils.py
morrison-turnansky Nov 12, 2025
3d2fd25
Update tests/engine/test_arg_utils.py
morrison-turnansky Nov 12, 2025
457357a
Update tests/engine/test_arg_utils.py
morrison-turnansky Nov 12, 2025
8fd2c07
reviewer comments
morrison-turnansky Nov 12, 2025
dba0848
fixed failing tests
morrison-turnansky Nov 12, 2025
e8d5839
Update tests/test_config.py
morrison-turnansky Nov 12, 2025
c7968a5
Update vllm/config/vllm.py
morrison-turnansky Nov 12, 2025
41adf1f
reverted change to test_vllm_config_defaults
morrison-turnansky Nov 12, 2025
83cebd5
merge
morrison-turnansky Nov 17, 2025
771107b
updated _apply_optimization_level_defaults to recurse any fields in s…
morrison-turnansky Nov 17, 2025
150d2bf
reviewer comments
morrison-turnansky Nov 18, 2025
2369185
reviewer comments
morrison-turnansky Nov 18, 2025
3002fa2
reviewer comments
morrison-turnansky Nov 20, 2025
b44255f
added optimization level docs
morrison-turnansky Nov 20, 2025
4025836
reviewer comments
morrison-turnansky Nov 20, 2025
158dfa6
support LLM(optimization_level), updated docs for workflow
morrison-turnansky Nov 24, 2025
7783009
Merge branch 'main' into issue-20283-model-config
morrison-turnansky Nov 24, 2025
128f5cc
linting
morrison-turnansky Nov 24, 2025
120e100
fixed tests/model_executor/test_enabled_custom_ops.py
morrison-turnansky Nov 24, 2025
81e6c81
Merge branch 'main' into issue-20283-model-config
morrison-turnansky Nov 25, 2025
ed754d5
ci failures
morrison-turnansky Nov 25, 2025
2e609ac
merge
morrison-turnansky Nov 25, 2025
6287f47
fixed ci failure locally without changing behaviour
morrison-turnansky Nov 25, 2025
5805d6a
merge
morrison-turnansky Nov 25, 2025
0a272da
fixed cacheing of set_current_vllm_config
morrison-turnansky Nov 25, 2025
a54f084
ci
morrison-turnansky Nov 26, 2025
2323a3d
Merge branch 'main' into issue-20283-model-config
morrison-turnansky Nov 26, 2025
20d1645
moved cache changes locally to test
morrison-turnansky Nov 26, 2025
3c65d2e
Update vllm/config/vllm.py
morrison-turnansky Nov 26, 2025
5fa2370
removed optimzation_level downgrade
morrison-turnansky Nov 26, 2025
e54a161
cleaned up cudagraph mode incompatibility log
morrison-turnansky Nov 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions docs/design/optimization_levels.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<!-- markdownlint-disable -->

# Optimization Levels

## Overview

vLLM now supports optimization levels (`-O0`, `-O1`, `-O2`, `-O3`). Optimization levels provide an intuitive mechnaism for users to trade startup time for performance. Higher levels have better performance but worse startup time. These optimization levels have associated defaults to help users get desired out of the box performance. Importantly, defaults set by optimization levels are purely defaults; explicit user settings will not be overwritten.

## Level Summaries and Usage Examples
```bash
# CLI usage
python -m vllm.entrypoints.api_server --model RedHatAI/Llama-3.2-1B-FP8 -O0

# Python API usage
from vllm.entrypoints.llm import LLM

llm = LLM(
model="RedHatAI/Llama-3.2-1B-FP8",
optimization_level=0
)
```

#### `-O1`: Quick Optimizations
- **Startup**: Moderate startup time
- **Performance**: Inductor compilation, CUDAGraphMode.PIECEWISE
- **Use case**: Balance for most development scenarios

```bash
# CLI usage
python -m vllm.entrypoints.api_server --model RedHatAI/Llama-3.2-1B-FP8 -O1

# Python API usage
from vllm.entrypoints.llm import LLM

llm = LLM(
model="RedHatAI/Llama-3.2-1B-FP8",
optimization_level=1
)
```

#### `-O2`: Full Optimizations (Default)
- **Startup**: Longer startup time
- **Performance**: `-O1` + CUDAGraphMode.FULL_AND_PIECEWISE
- **Use case**: Production workloads where performance is important. This is the default use case. It is also very similar to the previous default. The primary difference is that noop & fusion flags are enabled.

```bash
# CLI usage (default, so optional)
python -m vllm.entrypoints.api_server --model RedHatAI/Llama-3.2-1B-FP8 -O2

# Python API usage
from vllm.entrypoints.llm import LLM

llm = LLM(
model="RedHatAI/Llama-3.2-1B-FP8",
optimization_level=2 # This is the default
)
```

#### `-O3`: Full Optimization
Still in development. Added infrastructure to prevent changing API in future
release. Currently behaves the same O2.

## Troubleshooting

### Common Issues

1. **Startup Time Too Long**: Use `-O0` or `-O1` for faster startup
2. **Compilation Errors**: Use `debug_dump_path` for additional debugging information
3. **Performance Issues**: Ensure using `-O2` for production
4 changes: 2 additions & 2 deletions tests/compile/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ def test_splitting_ops_dynamic():
config = VllmConfig()
# Default V1 config leaves cudagraph mode unset; splitting ops are only
# populated when the engine decides to use piecewise compilation.
assert config.compilation_config.cudagraph_mode == CUDAGraphMode.NONE
assert not config.compilation_config.splitting_ops_contain_attention()
assert config.compilation_config.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE
assert config.compilation_config.splitting_ops_contain_attention()

# When use_inductor_graph_partition=True
config = VllmConfig(
Expand Down
57 changes: 41 additions & 16 deletions tests/engine/test_arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,28 +222,53 @@ def test_media_io_kwargs_parser(arg, expected):
assert args.media_io_kwargs == expected


def test_compilation_config():
@pytest.mark.parametrize(
("args", "expected"),
[
(["-O", "1"], "1"),
(["-O", "2"], "2"),
(["-O", "3"], "3"),
(["-O0"], "0"),
(["-O1"], "1"),
(["-O2"], "2"),
(["-O3"], "3"),
],
)
def test_optimization_level(args, expected):
"""
Test space-separated optimization levels (-O 1, -O 2, -O 3) map to
optimization_level.
"""
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
parsed_args = parser.parse_args(args)
assert parsed_args.optimization_level == expected
assert parsed_args.compilation_config.mode is None

# default value
args = parser.parse_args([])
assert args.compilation_config == CompilationConfig()

# set to O3
args = parser.parse_args(["-O0"])
assert args.compilation_config.mode == 0
@pytest.mark.parametrize(
("args", "expected"),
[
(["-O.mode=0"], 0),
(["-O.mode=1"], 1),
(["-O.mode=2"], 2),
(["-O.mode=3"], 3),
],
)
def test_mode_parser(args, expected):
"""
Test compilation config modes (-O.mode=int) map to compilation_config.
"""
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
parsed_args = parser.parse_args(args)
assert parsed_args.compilation_config.mode == expected

# set to O 3 (space)
args = parser.parse_args(["-O", "1"])
assert args.compilation_config.mode == 1

# set to O 3 (equals)
args = parser.parse_args(["-O=2"])
assert args.compilation_config.mode == 2
def test_compilation_config():
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())

# set to O.mode 3
args = parser.parse_args(["-O.mode", "3"])
assert args.compilation_config.mode == 3
# default value
args = parser.parse_args([])
assert args.compilation_config == CompilationConfig()

# set to string form of a dict
args = parser.parse_args(
Expand Down
8 changes: 7 additions & 1 deletion tests/model_executor/test_enabled_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
import torch

from vllm._aiter_ops import rocm_aiter_ops
from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config
from vllm.config import (
CompilationConfig,
VllmConfig,
get_cached_compilation_config,
set_current_vllm_config,
)
from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.layers.activation import (
GeluAndMul,
Expand Down Expand Up @@ -86,6 +91,7 @@ def test_enabled_ops(
backend=backend, mode=compilation_mode, custom_ops=custom_ops
)
)
get_cached_compilation_config.cache_clear()
with set_current_vllm_config(vllm_config):
assert CustomOp.default_on() == default_on

Expand Down
Loading