From d8b059924f6d420f043d1927612a7702d3c51d8e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 15 Jul 2025 11:53:55 +0100 Subject: [PATCH 1/4] Add full serve CLI reference back to docs Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/cli/README.md | 8 +++++++ docs/mkdocs/hooks/generate_argparse.py | 23 ++++++++++++++++--- requirements/docs.txt | 1 + vllm/entrypoints/cli/serve.py | 31 -------------------------- vllm/entrypoints/openai/cli_args.py | 28 +++++++++++++++++++++++ 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/docs/cli/README.md b/docs/cli/README.md index 1d951747a7ac..fe43de50d534 100644 --- a/docs/cli/README.md +++ b/docs/cli/README.md @@ -1,3 +1,7 @@ +--- +toc_depth: 3 +--- + # vLLM CLI Guide The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with: @@ -42,6 +46,10 @@ Start the vLLM OpenAI Compatible API server. vllm serve --help=page ``` +### help + +--8<-- "docs/argparse/serve.md" + ## chat Generate chat completions via the running API server. diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index 64120f2d1513..22cf41e6041d 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -16,6 +16,7 @@ sys.modules["vllm._C"] = MagicMock() from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402 +from vllm.entrypoints.openai.cli_args import make_arg_parser # noqa: E402 from vllm.utils import FlexibleArgumentParser # noqa: E402 logger = logging.getLogger("mkdocs") @@ -24,15 +25,18 @@ class MarkdownFormatter(HelpFormatter): """Custom formatter that generates markdown for argument groups.""" - def __init__(self, prog): + def __init__(self, prog, starting_heading_level=3): super().__init__(prog, max_help_position=float('inf'), width=float('inf')) + self._section_heading_prefix = "#" * starting_heading_level + self._argument_heading_prefix = "#" * (starting_heading_level + 1) self._markdown_output = [] def start_section(self, heading): if heading not in {"positional arguments", "options"}: - self._markdown_output.append(f"\n### {heading}\n\n") + heading_md = f"\n{self._section_heading_prefix} {heading}\n\n" + self._markdown_output.append(heading_md) def end_section(self): pass @@ -46,9 +50,13 @@ def add_usage(self, usage, actions, groups, prefix=None): def add_arguments(self, actions): for action in actions: + if (len(action.option_strings) == 0 + or "--help" in action.option_strings): + continue option_strings = f'`{"`, `".join(action.option_strings)}`' - self._markdown_output.append(f"#### {option_strings}\n\n") + heading_md = f"{self._argument_heading_prefix} {option_strings}\n\n" + self._markdown_output.append(heading_md) if choices := action.choices: choices = f'`{"`, `".join(str(c) for c in choices)}`' @@ -81,6 +89,14 @@ def create_parser(cls, **kwargs) -> FlexibleArgumentParser: return cls.add_cli_args(parser, **kwargs) +def create_serve_parser() -> FlexibleArgumentParser: + """Create a parser for the serve command with markdown formatting.""" + parser = FlexibleArgumentParser() + parser.formatter_class = lambda prog: MarkdownFormatter( + prog, starting_heading_level=4) + return make_arg_parser(parser) + + def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): logger.info("Generating argparse documentation") logger.debug("Root directory: %s", ROOT_DIR.resolve()) @@ -95,6 +111,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): "engine_args": create_parser(EngineArgs), "async_engine_args": create_parser(AsyncEngineArgs, async_args_only=True), + "serve": create_serve_parser(), } # Generate documentation for each parser diff --git a/requirements/docs.txt b/requirements/docs.txt index 7ea768b99093..1ddc825a9cdd 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -17,6 +17,7 @@ cloudpickle fastapi msgspec openai +partial-json-parser pillow psutil pybase64 diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index d25105cbb789..1204ccc1c679 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -67,37 +67,6 @@ def subparser_init( help="Start the vLLM OpenAI Compatible API server.", description="Start the vLLM OpenAI Compatible API server.", usage="vllm serve [model_tag] [options]") - serve_parser.add_argument("model_tag", - type=str, - nargs='?', - help="The model tag to serve " - "(optional if specified in config)") - serve_parser.add_argument( - "--headless", - action='store_true', - default=False, - help="Run in headless mode. See multi-node data parallel " - "documentation for more details.") - serve_parser.add_argument( - '--data-parallel-start-rank', - '-dpr', - type=int, - default=0, - help="Starting data parallel rank for secondary nodes. " - "Requires --headless.") - serve_parser.add_argument('--api-server-count', - '-asc', - type=int, - default=1, - help='How many API server processes to run.') - serve_parser.add_argument( - "--config", - type=str, - default='', - required=False, - help="Read CLI options from a config file. " - "Must be a YAML with the following options: " - "https://docs.vllm.ai/en/latest/configuration/serve_args.html") serve_parser = make_arg_parser(serve_parser) show_filtered_argument_or_group_from_help(serve_parser, ["serve"]) diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 9a7f04cd9b26..6688bf2656f6 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -248,6 +248,34 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: register all arguments instead of manually enumerating them here. This avoids code duplication and keeps the argument definitions in one place. """ + parser.add_argument("model_tag", + type=str, + nargs="?", + help="The model tag to serve " + "(optional if specified in config)") + parser.add_argument( + "--headless", + action="store_true", + default=False, + help="Run in headless mode. See multi-node data parallel " + "documentation for more details.") + parser.add_argument( + "--data-parallel-start-rank", + "-dpr", + type=int, + default=0, + help="Starting data parallel rank for secondary nodes. " + "Requires --headless.") + parser.add_argument("--api-server-count", + "-asc", + type=int, + default=1, + help="How many API server processes to run.") + parser.add_argument( + "--config", + help="Read CLI options from a config file. " + "Must be a YAML with the following options: " + "https://docs.vllm.ai/en/latest/configuration/serve_args.html") parser = FrontendArgs.add_cli_args(parser) parser = AsyncEngineArgs.add_cli_args(parser) From 62c43d4df262a8b32c229255d75fa08a70c1d066 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 15 Jul 2025 15:25:40 +0200 Subject: [PATCH 2/4] Show argument groups in TOC Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/cli/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cli/README.md b/docs/cli/README.md index fe43de50d534..1f86a26bddc7 100644 --- a/docs/cli/README.md +++ b/docs/cli/README.md @@ -1,5 +1,5 @@ --- -toc_depth: 3 +toc_depth: 4 --- # vLLM CLI Guide From ce82cb5d0b2d5ac82794d9a507c7a7e3f3f0141c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 15 Jul 2025 15:25:55 +0200 Subject: [PATCH 3/4] Rename `help` -> `Options` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/cli/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cli/README.md b/docs/cli/README.md index 1f86a26bddc7..dfb6051a8c8a 100644 --- a/docs/cli/README.md +++ b/docs/cli/README.md @@ -46,7 +46,7 @@ Start the vLLM OpenAI Compatible API server. vllm serve --help=page ``` -### help +### Options --8<-- "docs/argparse/serve.md" From 0fa773b58fc0df121239c6e904aca977f2406cb6 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 15 Jul 2025 15:26:07 +0200 Subject: [PATCH 4/4] Link to CLI reference from `serve_args.md` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/configuration/serve_args.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration/serve_args.md b/docs/configuration/serve_args.md index 142d4b8af898..c1cc5577bc7a 100644 --- a/docs/configuration/serve_args.md +++ b/docs/configuration/serve_args.md @@ -5,7 +5,7 @@ The `vllm serve` command is used to launch the OpenAI-compatible server. ## CLI Arguments The `vllm serve` command is used to launch the OpenAI-compatible server. -To see the available CLI arguments, run `vllm serve --help`! +To see the available options, take a look at the [CLI Reference](../cli/README.md#options)! ## Configuration file