Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
9e714c0
[grpc] Add gRPC server
CatherineSue Nov 12, 2025
5b0ae5a
Add grpc in CODEOWNERS
CatherineSue Dec 6, 2025
a274030
Add type stubs for proto files
CatherineSue Dec 6, 2025
64ff6d1
Exclude auto-generated gRPC stubs in mkdocs
CatherineSue Dec 6, 2025
41bc4f6
Run precommit
CatherineSue Dec 6, 2025
d5b2741
Add pyi in pyproject.toml
CatherineSue Dec 6, 2025
8421d59
Add mypy ignores to all generated grpc stubs
CatherineSue Dec 6, 2025
e11e45c
Exclude grpc in api-autonav
CatherineSue Dec 6, 2025
81db261
remove code owner, replace logger.error with logger.exception
CatherineSue Jan 3, 2026
df1898c
move grpc protobuf compilation to setup
CatherineSue Jan 3, 2026
0b306ce
remove disable log request server arg
CatherineSue Jan 3, 2026
22f08a7
Stop strings fix: Changed detokenize=False → detokenize=bool(stop) an…
CatherineSue Jan 3, 2026
c25a5d4
unify logging content with http server startup
CatherineSue Jan 3, 2026
7bf5e78
update input processor api call
CatherineSue Jan 3, 2026
af3ba36
assign request internal id
CatherineSue Jan 3, 2026
d23b707
streamline
njhill Jan 4, 2026
231bc03
remove grpc_request_manager.py for now
njhill Jan 5, 2026
1164f88
remove out-of-band Abort rbc
njhill Jan 5, 2026
da8e877
also support text input
njhill Jan 5, 2026
aba3984
fix doc warnings
njhill Jan 5, 2026
0cdf3ea
param type and validation fixes
njhill Jan 5, 2026
079ffb0
add ci test
njhill Jan 5, 2026
abfc077
Merge branch 'main' into vllm-grpc-upstream
simon-mo Jan 5, 2026
6efd639
add back oob abort rpc
njhill Jan 7, 2026
a6d5c39
Merge remote-tracking branch 'origin/main' into vllm-grpc-upstream
njhill Jan 7, 2026
4165ba8
streamline proto: remove custom err messages and req_id in responses
njhill Jan 7, 2026
be3845e
update grpc version; use uvloop
njhill Jan 7, 2026
40cea77
update protobuf version requirements
njhill Jan 8, 2026
46b8c88
Merge remote-tracking branch 'origin/main' into vllm-grpc-upstream
njhill Jan 8, 2026
370c046
also update text.txt version of protobuf
njhill Jan 8, 2026
e36c687
Merge remote-tracking branch 'origin/main' into vllm-grpc-upstream
njhill Jan 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,8 @@ ep_kernels_workspace/

# Allow tracked library source folders under submodules (e.g., benchmarks/lib)
!vllm/benchmarks/lib/

# Generated gRPC protobuf files (compiled at build time from vllm_engine.proto)
vllm/grpc/vllm_engine_pb2.py
vllm/grpc/vllm_engine_pb2_grpc.py
vllm/grpc/vllm_engine_pb2.pyi
4 changes: 3 additions & 1 deletion mkdocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,16 @@ plugins:
- "re:vllm\\._.*" # Internal modules
- "vllm.third_party"
- "vllm.vllm_flash_attn"
- "re:vllm\\.grpc\\..*_pb2.*" # Auto-generated protobuf files
- !ENV [API_AUTONAV_EXCLUDE, "re:^$"] # Match nothing by default
- mkdocstrings:
handlers:
python:
options:
show_symbol_type_heading: true
show_symbol_type_toc: true
filters: []
filters:
- "!.*_pb2_grpc" # Exclude auto-generated gRPC stubs
summary:
modules: true
show_if_no_docstring: true
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ requires = [
"torch == 2.9.1",
"wheel",
"jinja2",
"grpcio-tools>=1.76.0",
]
build-backend = "setuptools.build_meta"

Expand Down Expand Up @@ -55,6 +56,10 @@ include = ["vllm*"]
"vllm/third_party/**" = ["ALL"]
"vllm/version.py" = ["F401"]
"vllm/_version.py" = ["ALL"]
# Exclude generated protobuf files
"vllm/grpc/*_pb2.py" = ["ALL"]
"vllm/grpc/*_pb2_grpc.py" = ["ALL"]
"vllm/grpc/*_pb2.pyi" = ["ALL"]

[tool.ruff.lint]
select = [
Expand Down
2 changes: 2 additions & 0 deletions requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ wheel
jinja2>=3.1.6
regex
build
protobuf>=6.33.2
grpcio-tools>=1.76.0
4 changes: 3 additions & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ blake3
py-cpuinfo
transformers >= 4.56.0, < 5
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
protobuf # Required by LlamaTokenizer.
protobuf >= 6.30.0 # Required by LlamaTokenizer, gRPC.
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
aiohttp
openai >= 1.99.1 # For Responses API with reasoning content
Expand Down Expand Up @@ -52,3 +52,5 @@ openai-harmony >= 0.0.3 # Required for gpt-oss
anthropic == 0.71.0
model-hosting-container-standards >= 0.1.10, < 1.0.0
mcp
grpcio>=1.76.0
grpcio-reflection>=1.76.0
5 changes: 3 additions & 2 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ graphql-relay==3.2.0
# via graphene
greenlet==3.2.3
# via sqlalchemy
grpcio==1.71.0
grpcio==1.76.0
# via ray
gunicorn==23.0.0
# via mlflow
Expand Down Expand Up @@ -758,7 +758,7 @@ propcache==0.2.0
# yarl
proto-plus==1.26.1
# via google-api-core
protobuf==5.28.3
protobuf==6.33.2
# via
# google-api-core
# googleapis-common-protos
Expand Down Expand Up @@ -1249,6 +1249,7 @@ typing-extensions==4.15.0
# chz
# fastapi
# graphene
# grpcio
# huggingface-hub
# librosa
# lightning
Expand Down
73 changes: 71 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from packaging.version import Version, parse
from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py
from setuptools_scm import get_version
from torch.utils.cpp_extension import CUDA_HOME, ROCM_HOME

Expand Down Expand Up @@ -79,6 +80,73 @@ def is_freethreaded():
return bool(sysconfig.get_config_var("Py_GIL_DISABLED"))


def compile_grpc_protos():
"""Compile gRPC protobuf definitions during build.

This generates *_pb2.py, *_pb2_grpc.py, and *_pb2.pyi files from
the vllm_engine.proto definition.
"""
try:
from grpc_tools import protoc
except ImportError:
logger.warning(
"grpcio-tools not installed, skipping gRPC proto compilation. "
"gRPC server functionality will not be available."
)
return False

proto_file = ROOT_DIR / "vllm" / "grpc" / "vllm_engine.proto"
if not proto_file.exists():
logger.warning("Proto file not found at %s, skipping compilation", proto_file)
return False

logger.info("Compiling gRPC protobuf: %s", proto_file)

result = protoc.main(
[
"grpc_tools.protoc",
f"--proto_path={ROOT_DIR}",
f"--python_out={ROOT_DIR}",
f"--grpc_python_out={ROOT_DIR}",
f"--pyi_out={ROOT_DIR}",
str(proto_file),
]
)

if result != 0:
logger.error("protoc failed with exit code %s", result)
return False

# Add SPDX headers and mypy ignore to generated files
spdx_header = (
"# SPDX-License-Identifier: Apache-2.0\n"
"# SPDX-FileCopyrightText: Copyright contributors to the vLLM project\n"
"# mypy: ignore-errors\n"
)

grpc_dir = ROOT_DIR / "vllm" / "grpc"
for generated_file in [
grpc_dir / "vllm_engine_pb2.py",
grpc_dir / "vllm_engine_pb2_grpc.py",
grpc_dir / "vllm_engine_pb2.pyi",
]:
if generated_file.exists():
content = generated_file.read_text()
if not content.startswith("# SPDX-License-Identifier"):
generated_file.write_text(spdx_header + content)

logger.info("gRPC protobuf compilation successful")
return True


class BuildPyAndGenerateGrpc(build_py):
"""Build Python modules and generate gRPC stubs from proto files."""

def run(self):
compile_grpc_protos()
super().run()


class CMakeExtension(Extension):
def __init__(self, name: str, cmake_lists_dir: str = ".", **kwa) -> None:
super().__init__(name, sources=[], py_limited_api=not is_freethreaded(), **kwa)
Expand Down Expand Up @@ -882,12 +950,13 @@ def _read_requirements(filename: str) -> list[str]:
ext_modules = []

if not ext_modules:
cmdclass = {}
cmdclass = {"build_py": BuildPyAndGenerateGrpc}
else:
cmdclass = {
"build_ext": precompiled_build_ext
if envs.VLLM_USE_PRECOMPILED
else cmake_build_ext
else cmake_build_ext,
"build_py": BuildPyAndGenerateGrpc,
}

setup(
Expand Down
Loading