Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,10 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str):
]

if rsv_cores_for_kv:
if not hasattr(os, "sched_setaffinity"):
raise NotImplementedError(
"os.sched_setaffinity is not available on this platform"
)
os.sched_setaffinity(0, rsv_cores_for_kv)

# support for oot platform which can't register nixl memory
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/chat_completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, ClassVar, Literal

import torch
Expand All @@ -16,6 +15,7 @@
from pydantic import Field, model_validator

from vllm.config import ModelConfig
from vllm.config.utils import replace
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
ChatTemplateContentFormatOption,
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
import json
import time
from dataclasses import replace
from typing import Annotated, Any, Literal

import torch
from pydantic import Field, model_validator

from vllm.config import ModelConfig
from vllm.config.utils import replace
from vllm.entrypoints.openai.engine.protocol import (
AnyResponseFormat,
LegacyStructuralTagResponseFormat,
Expand Down
4 changes: 3 additions & 1 deletion vllm/entrypoints/openai/responses/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,9 @@ def to_sampling_params(
and response_format.schema_ is not None
):
structured_outputs = StructuredOutputsParams(
json=response_format.schema_
json=response_format.schema_ # type: ignore[call-arg]
# --follow-imports skip hides the class definition but also hides
# multiple third party conflicts, so best of both evils
)

stop = self.stop if self.stop else []
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/responses/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from collections.abc import AsyncGenerator, AsyncIterator, Callable, Sequence
from contextlib import AsyncExitStack
from copy import copy
from dataclasses import replace
from http import HTTPStatus
from typing import Final

Expand Down Expand Up @@ -40,6 +39,7 @@
from pydantic import TypeAdapter

from vllm import envs
from vllm.config.utils import replace
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
Expand Down
4 changes: 2 additions & 2 deletions vllm/sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""Sampling parameters for text generation."""

import copy
import json
import json as json_mod
from dataclasses import field
from enum import Enum, IntEnum
from functools import cached_property
Expand Down Expand Up @@ -791,7 +791,7 @@ def _validate_structured_outputs(
skip_guidance = False
if so_params.json:
if isinstance(so_params.json, str):
schema = json.loads(so_params.json)
schema = json_mod.loads(so_params.json)
else:
schema = so_params.json
skip_guidance = has_guidance_unsupported_json_features(schema)
Expand Down