Skip to content
Merged
Show file tree
Hide file tree
Changes from 73 commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
5650b95
Merge pull request #1 from vllm-project/main
sroy745 May 29, 2024
8f36146
Merge branch 'vllm-project:main' into main
sroy745 Jun 3, 2024
9e75057
Merge branch 'vllm-project:main' into main
sroy745 Jun 3, 2024
db2c679
Merge branch 'vllm-project:main' into main
sroy745 Jun 7, 2024
8d7512c
Merge branch 'vllm-project:main' into main
sroy745 Jun 10, 2024
1473f74
Merge branch 'vllm-project:main' into main
sroy745 Jun 12, 2024
4013e1a
Merge branch 'vllm-project:main' into main
sroy745 Jun 14, 2024
2dbdd78
Merge branch 'vllm-project:main' into main
sroy745 Jun 17, 2024
b3575e9
Merge branch 'vllm-project:main' into main
sroy745 Jun 20, 2024
94b0d43
Merge branch 'vllm-project:main' into main
sroy745 Jun 24, 2024
fa8fedf
Merge branch 'vllm-project:main' into main
sroy745 Jun 27, 2024
6ed96b4
Merge branch 'vllm-project:main' into main
sroy745 Jun 27, 2024
b71c533
Merge branch 'vllm-project:main' into main
sroy745 Jun 28, 2024
57babef
Merge branch 'vllm-project:main' into main
sroy745 Jun 29, 2024
4b19bac
Merge branch 'vllm-project:main' into main
sroy745 Jul 1, 2024
eb7a1c4
Merge branch 'vllm-project:main' into main
sroy745 Jul 6, 2024
7e2c87e
Merge branch 'vllm-project:main' into main
sroy745 Jul 10, 2024
6212d5f
Merge branch 'vllm-project:main' into main
sroy745 Jul 15, 2024
5491438
Merge branch 'vllm-project:main' into main
sroy745 Jul 17, 2024
68e080a
Merge branch 'vllm-project:main' into main
sroy745 Jul 31, 2024
55e4332
Merge branch 'vllm-project:main' into main
sroy745 Aug 13, 2024
532eb48
Merge branch 'vllm-project:main' into main
sroy745 Aug 22, 2024
7cea056
Merge branch 'vllm-project:main' into main
sroy745 Aug 22, 2024
185e056
Merge branch 'vllm-project:main' into main
sroy745 Aug 24, 2024
e2be95f
Merge branch 'vllm-project:main' into main
sroy745 Aug 27, 2024
2ed5473
Merge branch 'vllm-project:main' into main
sroy745 Aug 28, 2024
efa4714
Merge branch 'vllm-project:main' into main
sroy745 Aug 29, 2024
fb87d34
Merge branch 'vllm-project:main' into main
sroy745 Aug 29, 2024
5419e49
Merge branch 'vllm-project:main' into main
sroy745 Aug 31, 2024
9ba12f8
Merge branch 'vllm-project:main' into main
sroy745 Sep 2, 2024
25cef3d
Merge branch 'vllm-project:main' into main
sroy745 Sep 3, 2024
9d4cd09
Merge branch 'vllm-project:main' into main
sroy745 Sep 4, 2024
c48cacb
Merge branch 'vllm-project:main' into main
sroy745 Sep 5, 2024
c42c399
Merge branch 'vllm-project:main' into main
sroy745 Sep 7, 2024
3d13e43
Merge branch 'vllm-project:main' into main
sroy745 Sep 9, 2024
7479775
Merge branch 'vllm-project:main' into main
sroy745 Sep 11, 2024
df9b966
Merge branch 'vllm-project:main' into main
sroy745 Sep 17, 2024
9a7ed92
Merge branch 'vllm-project:main' into main
sroy745 Sep 17, 2024
118e838
Merge branch 'vllm-project:main' into main
sroy745 Sep 19, 2024
e640c69
Merge branch 'vllm-project:main' into main
sroy745 Sep 20, 2024
89fb6cd
Merge branch 'vllm-project:main' into main
sroy745 Sep 23, 2024
5d886cc
Merge branch 'vllm-project:main' into main
sroy745 Sep 24, 2024
56f2065
Merge branch 'vllm-project:main' into main
sroy745 Sep 24, 2024
28e103e
Merge branch 'vllm-project:main' into main
sroy745 Sep 25, 2024
2fc1490
Merge branch 'vllm-project:main' into main
sroy745 Sep 25, 2024
8805750
Merge branch 'vllm-project:main' into main
sroy745 Sep 26, 2024
b30e5af
Merge branch 'vllm-project:main' into main
sroy745 Sep 28, 2024
92322f1
Merge branch 'vllm-project:main' into main
sroy745 Sep 30, 2024
85e9001
Merge branch 'vllm-project:main' into main
sroy745 Oct 1, 2024
cd4ff89
Merge branch 'vllm-project:main' into main
sroy745 Oct 1, 2024
0dd96ed
Merge branch 'vllm-project:main' into main
sroy745 Oct 1, 2024
9d4d969
Merge branch 'vllm-project:main' into main
sroy745 Oct 3, 2024
7d223b5
Merge branch 'vllm-project:main' into main
sroy745 Oct 5, 2024
f327d91
Merge branch 'vllm-project:main' into main
sroy745 Oct 5, 2024
b5adf28
Merge branch 'vllm-project:main' into main
sroy745 Oct 6, 2024
caf0d12
Merge branch 'vllm-project:main' into main
sroy745 Oct 7, 2024
28e77b1
Merge branch 'vllm-project:main' into main
sroy745 Oct 8, 2024
21b9a98
Add an environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 wh…
sroy745 Oct 8, 2024
d5b2966
Setting the pytest args for some more test
sroy745 Oct 8, 2024
ddd7ec7
Fix tests
sroy745 Oct 8, 2024
1c0105a
Fix buildkite
sroy745 Oct 8, 2024
a9c9e0e
Fix buildkite
sroy745 Oct 8, 2024
7357e56
Fix
sroy745 Oct 8, 2024
39da78e
Fix benchmarks
sroy745 Oct 8, 2024
2ac9e8d
Fix chunked prefill test
sroy745 Oct 8, 2024
e90a9f7
Fix tests
sroy745 Oct 8, 2024
8fef1c8
Fix test comments
sroy745 Oct 8, 2024
2ccd1a0
Fix test comment
sroy745 Oct 8, 2024
db7e46d
Merge branch 'vllm-project:main' into main
sroy745 Oct 9, 2024
dc472dd
Merge remote-tracking branch 'origin/main' into sroy-deprecate-blk-mgr-1
sroy745 Oct 9, 2024
114321a
Dummy
sroy745 Oct 9, 2024
2d4abb7
Format
sroy745 Oct 9, 2024
11903b3
Merge branch 'main' into sroy-deprecate-blk-mgr-1
DarkLight1337 Oct 9, 2024
534fd3f
Define function
sroy745 Oct 9, 2024
d3abe6a
Merge branch 'sroy-deprecate-blk-mgr-1' of https://github.com/sroy745…
sroy745 Oct 10, 2024
7f9002b
set auto to true
sroy745 Oct 10, 2024
9356c1e
Set auto
sroy745 Oct 10, 2024
c515030
Fix buildkite ignore
sroy745 Oct 10, 2024
4e2bb9c
Changes to buildkite for Distributed Tests (2 GPUs)
sroy745 Oct 10, 2024
2364e71
Another attempt to fix Distributed Tests (2 GPUs)
sroy745 Oct 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ steps:
- vllm/
- tests/basic_correctness/test_chunked_prefill
commands:
- VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
- VLLM_ATTENTION_BACKEND=XFORMERS VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s basic_correctness/test_chunked_prefill.py
- VLLM_ATTENTION_BACKEND=FLASH_ATTN VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s basic_correctness/test_chunked_prefill.py

- label: Core Test # 10min
mirror_hardwares: [amd]
Expand All @@ -88,7 +88,11 @@ steps:
- vllm/distributed
- tests/core
commands:
- pytest -v -s core
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core/test_scheduler.py
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/test_chunked_prefill_scheduler.py
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/block/e2e/test_correctness.py
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s core core/block/e2e/test_correctness_sliding_window.py
- pytest -v -s core --ignore=core/block/e2e/test_correctness.py --ignore=core/test_scheduler.py --ignore=core/test_chunked_prefill_scheduler.py --ignore=core/block/e2e/test_correctness.py --ignore=core/block/e2e/test_correctness_sliding_window.py

- label: Entrypoints Test # 40min
working_dir: "/vllm-workspace/tests"
Expand Down Expand Up @@ -185,7 +189,8 @@ steps:
- vllm/
- tests/prefix_caching
commands:
- pytest -v -s prefix_caching
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s prefix_caching/test_prefix_caching.py
- pytest -v -s prefix_caching --ignore=prefix_caching/test_prefix_caching.py

- label: Samplers Test # 36min
source_file_dependencies:
Expand All @@ -209,7 +214,8 @@ steps:
- tests/spec_decode
commands:
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
- VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest -v -s spec_decode/e2e/test_compatibility.py
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py --ignore=tests/spec_decode/e2e/test_compatibility.py

- label: LoRA Test %N # 15min each
mirror_hardwares: [amd]
Expand Down
4 changes: 3 additions & 1 deletion benchmarks/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,9 @@ def run_to_completion(profile_dir: Optional[str] = None):
parser.add_argument("--enable-prefix-caching",
action='store_true',
help="Enable automatic prefix caching")
parser.add_argument('--use-v2-block-manager', action='store_true')
parser.add_argument('--use-v2-block-manager',
action='store_true',
default=EngineArgs.use_v2_block_manager)
parser.add_argument(
"--ray-workers-use-nsight",
action='store_true',
Expand Down
2 changes: 2 additions & 0 deletions benchmarks/benchmark_prefix_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from transformers import PreTrainedTokenizerBase

from vllm import LLM, SamplingParams
from vllm.engine.arg_utils import EngineArgs
from vllm.utils import FlexibleArgumentParser

try:
Expand Down Expand Up @@ -177,6 +178,7 @@ def main(args):
help='enable prefix caching')
parser.add_argument('--use-v2-block-manager',
action='store_true',
default=EngineArgs.use_v2_block_manager,
help='Use BlockSpaceMangerV2')
parser.add_argument('--num-prompts',
type=int,
Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ def main(args: argparse.Namespace):
help="Maximum number of forward steps per scheduler call.")
parser.add_argument("--use-v2-block-manager",
action='store_true',
default=EngineArgs.use_v2_block_manager,
help="Enable block manager v2.")
parser.add_argument(
"--enable-prefix-caching",
Expand Down
12 changes: 12 additions & 0 deletions tests/basic_correctness/test_chunked_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import pytest

import vllm.envs as envs

from ..models.utils import check_logprobs_close, check_outputs_equal
from ..utils import multi_gpu_test

Expand All @@ -20,6 +22,16 @@
]


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/core/test_scheduler.py`" #noqa
)


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [32])
Expand Down
11 changes: 11 additions & 0 deletions tests/core/block/e2e/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,22 @@

import pytest

import vllm.envs as envs
from vllm import SamplingParams

from .conftest import get_token_ids_from_llm_generator


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/core/test_scheduler.py`" #noqa
)


@pytest.mark.parametrize(
"common_llm_kwargs",
[{
Expand Down
11 changes: 11 additions & 0 deletions tests/core/block/e2e/test_correctness_sliding_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest

import vllm.envs as envs
from vllm import LLM, SamplingParams

from .conftest import get_text_from_llm_generator
Expand All @@ -12,6 +13,16 @@
BLOCK_SIZE = 16


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/core/block/e2e/test_correctness_sliding_window.py`" #noqa
)


@pytest.mark.parametrize(
"common_llm_kwargs",
[{
Expand Down
11 changes: 11 additions & 0 deletions tests/core/test_chunked_prefill_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest # noqa

import vllm.envs as envs
from vllm.config import CacheConfig, SchedulerConfig
from vllm.core.interfaces import AllocStatus
from vllm.core.scheduler import Scheduler
Expand All @@ -27,6 +28,16 @@ def schedule_and_update_computed_tokens(scheduler):
return metas, out


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/core/test_chunked_prefill_scheduler.py`" #noqa
)


@pytest.mark.parametrize('use_v2_block_manager', [True, False])
def test_simple(use_v2_block_manager: bool):
"""Verify basic scheduling works."""
Expand Down
11 changes: 11 additions & 0 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pytest
from torch import Use # noqa

import vllm.envs as envs
from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig
from vllm.core.interfaces import AllocStatus
from vllm.core.scheduler import Scheduler, SchedulingBudget
Expand All @@ -17,6 +18,16 @@
schedule_and_update_computed_tokens)


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/core/test_scheduler.py`" #noqa
)


@pytest.mark.parametrize('use_v2_block_manager', [True, False])
def test_scheduler_add_seq_group(use_v2_block_manager: bool):
block_size = 4
Expand Down
11 changes: 11 additions & 0 deletions tests/prefix_caching/test_prefix_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pytest

import vllm.envs as envs
from tests.kernels.utils import override_backend_env_variable
from vllm.block import PhysicalTokenBlock
from vllm.core.block_manager_v1 import CachedBlockAllocator
Expand All @@ -18,6 +19,16 @@
]


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/prefix_caching/test_prefix_caching.py`" #noqa
)


@pytest.mark.parametrize("block_size", [16])
@pytest.mark.parametrize("num_blocks", [16])
def test_block_allocator(
Expand Down
11 changes: 11 additions & 0 deletions tests/spec_decode/e2e/test_compatibility.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
import pytest

import vllm.envs as envs
from vllm import SamplingParams

from .conftest import get_output_from_llm_generator


@pytest.fixture(scope="module", autouse=False)
def check_deprecated_block_manager():
assert envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1 is True, (
"To allow the use of deprecated BlockSpaceManagerV1, set the "
"environment variable VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. "
"You can run the tests with: "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1 pytest tests/spec_decode/e2e/test_compatibility.py`" #noqa
)


@pytest.mark.parametrize(
"common_llm_kwargs",
[{
Expand Down
12 changes: 12 additions & 0 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,6 +1037,18 @@ def _verify_args(self) -> None:
f"({self.num_scheduler_steps}) must be greater than or "
"equal to 1.")

if (not self.use_v2_block_manager \
and not envs.VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1):
raise ValueError(
"The use of BlockSpaceManagerV1 is deprecated and will "
"be removed in a future release. Please switch to "
"BlockSpaceManagerV2 by setting --use-v2-block-manager to "
"True. If you wish to suppress this error temporarily, "
"you can set the environment variable "
"`VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1=1. If your use "
"case is not supported in BlockSpaceManagerV2, please "
"file an issue with detailed information.")

@property
def is_multi_step(self) -> bool:
return self.num_scheduler_steps > 1
Expand Down
6 changes: 6 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
VLLM_USE_TRITON_AWQ: bool = False
VLLM_ALLOW_RUNTIME_LORA_UPDATING: bool = False
VLLM_SKIP_P2P_CHECK: bool = False
VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1: bool = False


def get_default_cache_root():
Expand Down Expand Up @@ -434,6 +435,11 @@ def get_default_config_root():
# and trust the driver's peer-to-peer capability report.
"VLLM_SKIP_P2P_CHECK":
lambda: os.getenv("VLLM_SKIP_P2P_CHECK", "0") == "1",

# If set, allowing the use of deprecated block manager V1
"VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1":
lambda: os.environ.get("VLLM_ALLOW_DEPRECATED_BLOCK_MANAGER_V1", "0"
) == "1",
}

# end-env-vars-definition
Expand Down
Loading