Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions benchmarks/kernels/benchmark_activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@

import vllm.model_executor.layers.activation # noqa F401
from vllm.model_executor.custom_op import CustomOp
from vllm.platforms import current_platform
from vllm.triton_utils import triton
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed

batch_size_range = [1, 16, 128]
seq_len_range = [1, 16, 64, 1024, 4096]
Expand All @@ -30,7 +29,7 @@ def benchmark_activation(
device = "cuda"
num_tokens = batch_size * seq_len
dim = intermediate_size
current_platform.seed_everything(42)
set_random_seed(42)
torch.set_default_device(device)

if func_name == "gelu_and_mul":
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/kernels/benchmark_layernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import torch

from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed


@torch.inference_mode()
Expand All @@ -22,7 +21,7 @@ def main(
num_warmup_iters: int = 5,
num_iters: int = 100,
) -> None:
current_platform.seed_everything(seed)
set_random_seed(seed)
torch.set_default_device("cuda")

layer = RMSNorm(hidden_size).to(dtype=dtype)
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/kernels/benchmark_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from vllm.transformers_utils.config import get_config
from vllm.triton_utils import triton
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed

FP8_DTYPE = current_platform.fp8_dtype()

Expand Down Expand Up @@ -431,7 +432,7 @@ def merge_unique_dicts(list1, list2):
class BenchmarkWorker:
def __init__(self, seed: int) -> None:
torch.set_default_device("cuda")
current_platform.seed_everything(seed)
set_random_seed(seed)
self.seed = seed
# Get the device ID to allocate tensors and kernels
# on the respective GPU. This is required for Ray to work
Expand All @@ -451,7 +452,7 @@ def benchmark(
block_quant_shape: list[int] = None,
use_deep_gemm: bool = False,
) -> tuple[dict[str, int], float]:
current_platform.seed_everything(self.seed)
set_random_seed(self.seed)
dtype_str = _get_config_dtype_str(
dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8
)
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/kernels/benchmark_moe_permute_unpermute.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from vllm.model_executor.layers.fused_moe.utils import _fp8_quantize
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed

FP8_DTYPE = current_platform.fp8_dtype()

Expand Down Expand Up @@ -261,7 +262,7 @@ def run(input: tuple):
class BenchmarkWorker:
def __init__(self, seed: int) -> None:
torch.set_default_device("cuda")
current_platform.seed_everything(seed)
set_random_seed(seed)
self.seed = seed
# Get the device ID to allocate tensors and kernels
# on the respective GPU. This is required for Ray to work
Expand All @@ -279,7 +280,7 @@ def benchmark(
use_int8_w8a16: bool,
use_customized_permute: bool = False,
) -> tuple[dict[str, int], float]:
current_platform.seed_everything(self.seed)
set_random_seed(self.seed)

permute_time = benchmark_permute(
num_tokens,
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/kernels/benchmark_mrope.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@
import torch

from vllm.model_executor.layers.rotary_embedding import get_rope
from vllm.platforms import current_platform
from vllm.transformers_utils.config import get_config
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import set_random_seed

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Expand Down Expand Up @@ -94,7 +94,7 @@ def benchmark_mrope(
benchmark_iter: int = 100,
csv_writer=None,
):
current_platform.seed_everything(seed)
set_random_seed(seed)
torch.set_default_device(device)
# the parameters to compute the q k v size based on tp_size
mrope_helper_class = get_rope(
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/kernels/benchmark_paged_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random,
set_random_seed,
)

logger = init_logger(__name__)
Expand All @@ -38,7 +39,7 @@ def main(
device: str = "cuda",
kv_cache_dtype: str | None = None,
) -> None:
current_platform.seed_everything(seed)
set_random_seed(seed)

scale = float(1.0 / (head_size**0.5))
query = torch.empty(
Expand Down
5 changes: 2 additions & 3 deletions benchmarks/kernels/benchmark_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
import torch

from vllm import _custom_ops as ops
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed


@torch.inference_mode()
Expand All @@ -23,7 +22,7 @@ def main(
num_warmup_iters: int = 5,
num_iters: int = 100,
) -> None:
current_platform.seed_everything(seed)
set_random_seed(seed)
torch.set_default_device("cuda")

x = torch.randn(num_tokens, hidden_size, dtype=dtype)
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/kernels/benchmark_reshape_and_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

from vllm import _custom_ops as ops
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random,
set_random_seed,
)

logger = init_logger(__name__)
Expand All @@ -36,7 +36,7 @@ def run_benchmark(
if kv_cache_dtype == "fp8" and head_size % 16:
raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.")

current_platform.seed_everything(42)
set_random_seed(42)
torch.set_default_device(device)

# create random key / value tensors [T, H, D].
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/kernels/benchmark_reshape_and_cache_flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
triton_reshape_and_cache_flash,
)
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.torch_utils import (
STR_DTYPE_TO_TORCH_DTYPE,
create_kv_caches_with_random_flash,
set_random_seed,
)

logger = init_logger(__name__)
Expand Down Expand Up @@ -49,7 +49,7 @@ def run_benchmark(
if implementation == "triton" and kv_cache_layout == "HND":
return float("nan") # Triton does not support HND layout yet.

current_platform.seed_everything(42)
set_random_seed(42)
torch.set_default_device(device)

# create random key / value tensors [T, H, D].
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/kernels/benchmark_silu_mul_fp8_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
persistent_masked_m_silu_mul_quant,
)
from vllm.platforms import current_platform
from vllm.triton_utils import tl, triton
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
from vllm.utils.torch_utils import set_random_seed


@triton.jit
Expand Down Expand Up @@ -207,7 +207,7 @@ def benchmark(
):
def generate_data(seed_offset=0):
"""Generate input data with given seed offset"""
current_platform.seed_everything(42 + seed_offset)
set_random_seed(42 + seed_offset)
y = torch.rand((E, T, 2 * H), dtype=torch.bfloat16, device="cuda").contiguous()

if gen_strategy == "random_imbalanced":
Expand Down
2 changes: 1 addition & 1 deletion docs/design/plugin_system.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,4 @@ The interface for the model/module may change during vLLM's development. If you
!!! warning "Deprecations"
- `use_v1` parameter in `Platform.get_attn_backend_cls` is deprecated. It has been removed in v0.13.0.
- `_Backend` in `vllm.attention` is deprecated. It has been removed in v0.13.0. Please use `vllm.attention.backends.registry.register_backend` to add new attention backend to `AttentionBackendEnum` instead.
- `seed_everything` platform interface is deprecated. It will be removed in v0.14.0 or later. Please use `vllm.utils.torch_utils.set_random_seed` instead.
- `seed_everything` platform interface is deprecated. It will be removed in v0.15.0 or later. Please use `vllm.utils.torch_utils.set_random_seed` instead.
9 changes: 5 additions & 4 deletions vllm/platforms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import numpy as np
import torch
from typing_extensions import deprecated

from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.logger import init_logger
Expand Down Expand Up @@ -365,17 +366,17 @@ def inference_mode(cls):
return torch.inference_mode(mode=True)

@classmethod
@deprecated(
"`seed_everything` is deprecated. It will be removed in v0.15.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
def seed_everything(cls, seed: int | None = None) -> None:
"""
Set the seed of each random module.
`torch.manual_seed` will set seed on all devices.

Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
"""
logger.info_once(
"`seed_everything` is deprecated. It will be removed in v0.14.0 or later. "
"Please use `vllm.utils.torch_utils.set_random_seed` instead."
)
if seed is not None:
random.seed(seed)
np.random.seed(seed)
Expand Down
Loading