Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ include = ["vllm*"]
"vllm/distributed/parallel_state.py" = ["SIM108"]
"vllm/entrypoints/chat_utils.py" = ["SIM108"]
"vllm/entrypoints/llm.py" = ["SIM108"]
"vllm/executor/ray_distributed_executor.py" = ["SIM108", "SIM112"]
"vllm/model_executor/layers/batch_invariant.py" = ["SIM108"]
"vllm/model_executor/layers/fla/ops/chunk_o.py" = ["SIM108"]
"vllm/model_executor/layers/fused_moe/fused_moe.py" = ["SIM108"]
Expand All @@ -134,23 +135,6 @@ include = ["vllm*"]
"tools/profiler/print_layerwise_table.py" = ["SIM118"]
## Loop variable binding issues
"tests/kernels/mamba/test_mamba_ssm_ssd.py" = ["B023"]
## Type annotation modernization and other rules
"vllm/attention/backends/abstract.py" = ["UP035", "UP006"]
"vllm/attention/layer.py" = ["UP035", "UP006"]
"vllm/attention/layers/chunked_local_attention.py" = ["UP035", "UP006"]
"vllm/attention/ops/flashmla.py" = ["UP035", "UP006"]
"vllm/attention/ops/paged_attn.py" = ["UP035", "UP006"]
"vllm/engine/arg_utils.py" = ["UP035", "UP006"]
"vllm/engine/metrics.py" = ["UP035", "UP006"]
"vllm/engine/metrics_types.py" = ["UP035", "UP006"]
"vllm/executor/executor_base.py" = ["UP035", "UP006"]
"vllm/executor/msgspec_utils.py" = ["UP035", "UP006"]
"vllm/executor/ray_distributed_executor.py" = ["UP035", "UP006", "SIM108", "SIM112"]
"vllm/executor/ray_utils.py" = ["UP035", "UP006"]
"vllm/executor/uniproc_executor.py" = ["UP035", "UP006"]
"vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py" = ["UP035"]
## Type comparison issues
"vllm/multimodal/inputs.py" = ["E721"]
# End of temporary ignores

[tool.ruff.lint]
Expand Down
4 changes: 2 additions & 2 deletions tests/compile/test_full_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import logging
import tempfile
from typing import Any, Optional, Union
from typing import Any, Union

import pytest
import torch
Expand All @@ -21,7 +21,7 @@
from ..utils import create_new_process_for_each_test


def models_list(*, all: bool = True, keywords: Optional[list[str]] = None):
def models_list(*, all: bool = True, keywords: list[str] | None = None):
TEST_MODELS: list[tuple[str, dict[str, Any]]] = [
("facebook/opt-125m", {}),
(
Expand Down
6 changes: 3 additions & 3 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import asyncio
from contextlib import suppress
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock, MagicMock

import pytest
Expand Down Expand Up @@ -233,9 +233,9 @@ class MockModelConfig:
multimodal_config = MultiModalConfig()
hf_config = MockHFConfig()
logits_processor_pattern = None
diff_sampling_param: Optional[dict] = None
diff_sampling_param: dict | None = None
allowed_local_media_path: str = ""
allowed_media_domains: Optional[list[str]] = None
allowed_media_domains: list[str] | None = None
encoder_config = None
generation_config: str = "auto"
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import tempfile
import urllib.request
from collections.abc import Sequence
from typing import Any, Optional, Union
from typing import Any, Union

import albumentations
import numpy as np
Expand Down Expand Up @@ -98,9 +98,9 @@ def _convert_np_uint8(float_image: torch.Tensor):


def read_geotiff(
file_path: Optional[str] = None,
path_type: Optional[str] = None,
file_data: Optional[bytes] = None,
file_path: str | None = None,
path_type: str | None = None,
file_data: bytes | None = None,
) -> tuple[torch.Tensor, dict, tuple[float, float] | None]:
"""Read all bands from *file_path* and return image + meta info.

Expand All @@ -114,8 +114,8 @@ def read_geotiff(

if all([x is None for x in [file_path, path_type, file_data]]):
raise Exception("All input fields to read_geotiff are None")
write_to_file: Optional[bytes] = None
path: Optional[str] = None
write_to_file: bytes | None = None
path: str | None = None
if file_data is not None:
# with tempfile.NamedTemporaryFile() as tmpfile:
# tmpfile.write(file_data)
Expand Down Expand Up @@ -162,9 +162,9 @@ def read_geotiff(
def load_image(
data: Union[list[str]],
path_type: str,
mean: Optional[list[float]] = None,
std: Optional[list[float]] = None,
indices: Optional[Union[list[int], None]] = None,
mean: list[float] | None = None,
std: list[float] | None = None,
indices: Union[list[int], None] | None = None,
):
"""Build an input example by loading images in *file_paths*.

Expand Down Expand Up @@ -278,7 +278,7 @@ def output_to_response(
def pre_process(
self,
prompt: IOProcessorInput,
request_id: Optional[str] = None,
request_id: str | None = None,
**kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
image_data = dict(prompt)
Expand Down Expand Up @@ -359,7 +359,7 @@ def pre_process(
def post_process(
self,
model_output: Sequence[PoolingRequestOutput],
request_id: Optional[str] = None,
request_id: str | None = None,
**kwargs,
) -> IOProcessorOutput:
pred_imgs_list = []
Expand Down
4 changes: 2 additions & 2 deletions tests/v1/engine/test_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

import random
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING

import pytest

Expand Down Expand Up @@ -78,7 +78,7 @@ def vllm_model_skip_tokenizer_init(vllm_runner, request, monkeypatch):

def _get_test_sampling_params(
prompt_list: list[str],
seed: Optional[int] = 42,
seed: int | None = 42,
structured_outputs: bool = False,
) -> tuple[list[SamplingParams], list[int]]:
"""Generate random sampling params for a batch."""
Expand Down
12 changes: 6 additions & 6 deletions vllm/attention/backends/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from abc import ABC, abstractmethod
from typing import Generic, List, Optional, Protocol, Tuple, Type, TypeVar
from typing import Generic, Optional, Protocol, TypeVar

import torch

Expand Down Expand Up @@ -48,12 +48,12 @@ def get_name() -> str:

@staticmethod
@abstractmethod
def get_impl_cls() -> Type["AttentionImpl"]:
def get_impl_cls() -> type["AttentionImpl"]:
raise NotImplementedError

@staticmethod
@abstractmethod
def get_metadata_cls() -> Type["AttentionMetadata"]:
def get_metadata_cls() -> type["AttentionMetadata"]:
raise NotImplementedError

@classmethod
Expand All @@ -73,11 +73,11 @@ def get_kv_cache_shape(
num_kv_heads: int,
head_size: int,
cache_dtype_str: str = "auto",
) -> Tuple[int, ...]:
) -> tuple[int, ...]:
raise NotImplementedError

@staticmethod
def get_kv_cache_stride_order() -> Tuple[int, ...]:
def get_kv_cache_stride_order() -> tuple[int, ...]:
raise NotImplementedError

@classmethod
Expand Down Expand Up @@ -147,7 +147,7 @@ def __init__(
head_size: int,
scale: float,
num_kv_heads: Optional[int] = None,
alibi_slopes: Optional[List[float]] = None,
alibi_slopes: Optional[list[float]] = None,
sliding_window: Optional[int] = None,
kv_cache_dtype: str = "auto",
logits_soft_cap: Optional[float] = None,
Expand Down
6 changes: 3 additions & 3 deletions vllm/attention/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Attention layer."""

from typing import Callable, List, Optional
from typing import Callable, Optional

import torch
import torch.nn as nn
Expand Down Expand Up @@ -126,7 +126,7 @@ def __init__(
head_size: int,
scale: float,
num_kv_heads: Optional[int] = None,
alibi_slopes: Optional[List[float]] = None,
alibi_slopes: Optional[list[float]] = None,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
logits_soft_cap: Optional[float] = None,
Expand Down Expand Up @@ -586,7 +586,7 @@ def wait_for_kv_layer_from_connector(layer_name: str):

def maybe_save_kv_layer_to_connector(
layer_name: str,
kv_cache_layer: List[torch.Tensor],
kv_cache_layer: list[torch.Tensor],
):
if not has_kv_transfer_group() or not is_v1_kv_transfer_group():
return
Expand Down
4 changes: 2 additions & 2 deletions vllm/attention/layers/chunked_local_attention.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import functools
from typing import ClassVar, List, Optional
from typing import ClassVar, Optional

import torch

Expand Down Expand Up @@ -61,7 +61,7 @@ def __init__(
scale: float,
attention_chunk_size: int,
num_kv_heads: Optional[int] = None,
alibi_slopes: Optional[List[float]] = None,
alibi_slopes: Optional[list[float]] = None,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
kv_sharing_target_layer_name: Optional[str] = None,
Expand Down
10 changes: 5 additions & 5 deletions vllm/attention/ops/flashmla.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# adapted from: https://github.com/deepseek-ai/FlashMLA/blob/main/flash_mla/flash_mla_interface.py
from typing import Optional, Tuple
from typing import Optional

import torch

Expand Down Expand Up @@ -31,7 +31,7 @@
_flashmla_extension_C_AVAILABLE = False


def is_flashmla_supported() -> Tuple[bool, Optional[str]]:
def is_flashmla_supported() -> tuple[bool, Optional[str]]:
"""
Return: is_supported_flag, unsupported_reason (optional).
"""
Expand All @@ -57,7 +57,7 @@ def get_mla_metadata(
num_heads_q: Optional[int] = None,
is_fp8_kvcache: bool = False,
topk: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Arguments:
- cache_seqlens: (batch_size), dtype torch.int32.
Expand Down Expand Up @@ -101,7 +101,7 @@ def flash_mla_with_kvcache(
descale_k: Optional[torch.Tensor] = None,
is_fp8_kvcache: bool = False,
indices: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
"""
Arguments:
- q: (batch_size, seq_len_q, num_heads_q, head_dim).
Expand Down Expand Up @@ -183,7 +183,7 @@ def flash_mla_sparse_prefill(
indices: torch.Tensor,
sm_scale: float,
d_v: int = 512,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
Sparse attention prefill kernel

Expand Down
10 changes: 5 additions & 5 deletions vllm/attention/ops/paged_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from dataclasses import dataclass
from typing import List, Optional, Tuple
from typing import Optional

import torch

Expand Down Expand Up @@ -41,7 +41,7 @@ class PagedAttentionMetadata:

class PagedAttention:
@staticmethod
def get_supported_head_sizes() -> List[int]:
def get_supported_head_sizes() -> list[int]:
return [32, 64, 80, 96, 112, 120, 128, 192, 256]

@staticmethod
Expand All @@ -51,15 +51,15 @@ def get_kv_cache_shape(
num_kv_heads: int,
head_size: int,
cache_dtype_str: str = "auto",
) -> Tuple[int, ...]:
) -> tuple[int, ...]:
return (2, num_blocks, block_size * num_kv_heads * head_size)

@staticmethod
def split_kv_cache(
kv_cache: torch.Tensor,
num_kv_heads: int,
head_size: int,
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
x = 16 // kv_cache.element_size()
num_blocks = kv_cache.shape[1]

Expand Down Expand Up @@ -255,7 +255,7 @@ def swap_blocks(

@staticmethod
def copy_blocks(
kv_caches: List[torch.Tensor],
kv_caches: list[torch.Tensor],
src_to_dists: torch.Tensor,
) -> None:
key_caches = [kv_cache[0] for kv_cache in kv_caches]
Expand Down
Loading
Loading