Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/cli/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
---
toc_depth: 3
---

# vLLM CLI Guide

The vllm command-line tool is used to run and manage vLLM models. You can start by viewing the help message with:
Expand Down Expand Up @@ -42,6 +46,10 @@ Start the vLLM OpenAI Compatible API server.
vllm serve --help=page
```

### help
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The heading ### help is a bit generic and could be more descriptive for users navigating the documentation. A more specific heading would improve clarity.

Suggested change
### help
### `vllm serve` Arguments

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would call this section Options


--8<-- "docs/argparse/serve.md"

## chat

Generate chat completions via the running API server.
Expand Down
23 changes: 20 additions & 3 deletions docs/mkdocs/hooks/generate_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
sys.modules["vllm._C"] = MagicMock()

from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs # noqa: E402
from vllm.entrypoints.openai.cli_args import make_arg_parser # noqa: E402
from vllm.utils import FlexibleArgumentParser # noqa: E402

logger = logging.getLogger("mkdocs")
Expand All @@ -24,15 +25,18 @@
class MarkdownFormatter(HelpFormatter):
"""Custom formatter that generates markdown for argument groups."""

def __init__(self, prog):
def __init__(self, prog, starting_heading_level=3):
super().__init__(prog,
max_help_position=float('inf'),
width=float('inf'))
self._section_heading_prefix = "#" * starting_heading_level
self._argument_heading_prefix = "#" * (starting_heading_level + 1)
self._markdown_output = []

def start_section(self, heading):
if heading not in {"positional arguments", "options"}:
self._markdown_output.append(f"\n### {heading}\n\n")
heading_md = f"\n{self._section_heading_prefix} {heading}\n\n"
self._markdown_output.append(heading_md)

def end_section(self):
pass
Expand All @@ -46,9 +50,13 @@ def add_usage(self, usage, actions, groups, prefix=None):

def add_arguments(self, actions):
for action in actions:
if (len(action.option_strings) == 0
or "--help" in action.option_strings):
continue

option_strings = f'`{"`, `".join(action.option_strings)}`'
self._markdown_output.append(f"#### {option_strings}\n\n")
heading_md = f"{self._argument_heading_prefix} {option_strings}\n\n"
self._markdown_output.append(heading_md)

if choices := action.choices:
choices = f'`{"`, `".join(str(c) for c in choices)}`'
Expand Down Expand Up @@ -81,6 +89,14 @@ def create_parser(cls, **kwargs) -> FlexibleArgumentParser:
return cls.add_cli_args(parser, **kwargs)


def create_serve_parser() -> FlexibleArgumentParser:
"""Create a parser for the serve command with markdown formatting."""
parser = FlexibleArgumentParser()
parser.formatter_class = lambda prog: MarkdownFormatter(
prog, starting_heading_level=4)
return make_arg_parser(parser)


def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
logger.info("Generating argparse documentation")
logger.debug("Root directory: %s", ROOT_DIR.resolve())
Expand All @@ -95,6 +111,7 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
"engine_args": create_parser(EngineArgs),
"async_engine_args": create_parser(AsyncEngineArgs,
async_args_only=True),
"serve": create_serve_parser(),
}

# Generate documentation for each parser
Expand Down
1 change: 1 addition & 0 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ cloudpickle
fastapi
msgspec
openai
partial-json-parser
pillow
psutil
pybase64
Expand Down
31 changes: 0 additions & 31 deletions vllm/entrypoints/cli/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,37 +67,6 @@ def subparser_init(
help="Start the vLLM OpenAI Compatible API server.",
description="Start the vLLM OpenAI Compatible API server.",
usage="vllm serve [model_tag] [options]")
serve_parser.add_argument("model_tag",
type=str,
nargs='?',
help="The model tag to serve "
"(optional if specified in config)")
serve_parser.add_argument(
"--headless",
action='store_true',
default=False,
help="Run in headless mode. See multi-node data parallel "
"documentation for more details.")
serve_parser.add_argument(
'--data-parallel-start-rank',
'-dpr',
type=int,
default=0,
help="Starting data parallel rank for secondary nodes. "
"Requires --headless.")
serve_parser.add_argument('--api-server-count',
'-asc',
type=int,
default=1,
help='How many API server processes to run.')
serve_parser.add_argument(
"--config",
type=str,
default='',
required=False,
help="Read CLI options from a config file. "
"Must be a YAML with the following options: "
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")

serve_parser = make_arg_parser(serve_parser)
show_filtered_argument_or_group_from_help(serve_parser, ["serve"])
Expand Down
28 changes: 28 additions & 0 deletions vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,34 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
register all arguments instead of manually enumerating them here. This
avoids code duplication and keeps the argument definitions in one place.
"""
parser.add_argument("model_tag",
type=str,
nargs="?",
help="The model tag to serve "
"(optional if specified in config)")
parser.add_argument(
"--headless",
action="store_true",
default=False,
help="Run in headless mode. See multi-node data parallel "
"documentation for more details.")
parser.add_argument(
"--data-parallel-start-rank",
"-dpr",
type=int,
default=0,
help="Starting data parallel rank for secondary nodes. "
"Requires --headless.")
parser.add_argument("--api-server-count",
"-asc",
type=int,
default=1,
help="How many API server processes to run.")
Comment on lines +256 to +273
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At high level, these belong to FrontEndArgs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about adding them, but I wasn't sure about the data parallel ones though, because they are sort of "pre-frontend"

parser.add_argument(
"--config",
help="Read CLI options from a config file. "
"Must be a YAML with the following options: "
"https://docs.vllm.ai/en/latest/configuration/serve_args.html")
parser = FrontendArgs.add_cli_args(parser)
parser = AsyncEngineArgs.add_cli_args(parser)

Expand Down