diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 245ac7daf7ec..b3f2ae703fdf 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -935,6 +935,10 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): ] if rsv_cores_for_kv: + if not hasattr(os, "sched_setaffinity"): + raise NotImplementedError( + "os.sched_setaffinity is not available on this platform" + ) os.sched_setaffinity(0, rsv_cores_for_kv) # support for oot platform which can't register nixl memory diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 14feb49767b3..9763f2e5cd06 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -5,7 +5,6 @@ # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py import json import time -from dataclasses import replace from typing import Annotated, Any, ClassVar, Literal import torch @@ -16,6 +15,7 @@ from pydantic import Field, model_validator from vllm.config import ModelConfig +from vllm.config.utils import replace from vllm.entrypoints.chat_utils import ( ChatCompletionMessageParam, ChatTemplateContentFormatOption, diff --git a/vllm/entrypoints/openai/completion/protocol.py b/vllm/entrypoints/openai/completion/protocol.py index 904c9eca4e93..aec1a0a95de6 100644 --- a/vllm/entrypoints/openai/completion/protocol.py +++ b/vllm/entrypoints/openai/completion/protocol.py @@ -5,13 +5,13 @@ # https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py import json import time -from dataclasses import replace from typing import Annotated, Any, Literal import torch from pydantic import Field, model_validator from vllm.config import ModelConfig +from vllm.config.utils import replace from vllm.entrypoints.openai.engine.protocol import ( AnyResponseFormat, LegacyStructuralTagResponseFormat, diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py index 2b62d7dca76c..b0ffd0314792 100644 --- a/vllm/entrypoints/openai/responses/protocol.py +++ b/vllm/entrypoints/openai/responses/protocol.py @@ -337,7 +337,9 @@ def to_sampling_params( and response_format.schema_ is not None ): structured_outputs = StructuredOutputsParams( - json=response_format.schema_ + json=response_format.schema_ # type: ignore[call-arg] + # --follow-imports skip hides the class definition but also hides + # multiple third party conflicts, so best of both evils ) stop = self.stop if self.stop else [] diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py index 4055095fd2f3..3077df59fda0 100644 --- a/vllm/entrypoints/openai/responses/serving.py +++ b/vllm/entrypoints/openai/responses/serving.py @@ -8,7 +8,6 @@ from collections.abc import AsyncGenerator, AsyncIterator, Callable, Sequence from contextlib import AsyncExitStack from copy import copy -from dataclasses import replace from http import HTTPStatus from typing import Final @@ -40,6 +39,7 @@ from pydantic import TypeAdapter from vllm import envs +from vllm.config.utils import replace from vllm.engine.protocol import EngineClient from vllm.entrypoints.chat_utils import ( ChatCompletionMessageParam, diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 520481c58e7d..cf4922b238a0 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -3,7 +3,7 @@ """Sampling parameters for text generation.""" import copy -import json +import json as json_mod from dataclasses import field from enum import Enum, IntEnum from functools import cached_property @@ -791,7 +791,7 @@ def _validate_structured_outputs( skip_guidance = False if so_params.json: if isinstance(so_params.json, str): - schema = json.loads(so_params.json) + schema = json_mod.loads(so_params.json) else: schema = so_params.json skip_guidance = has_guidance_unsupported_json_features(schema)