diff --git a/docs/guides/server.md b/docs/guides/server.md index 12badf87f..65d1c2da5 100644 --- a/docs/guides/server.md +++ b/docs/guides/server.md @@ -2,6 +2,8 @@ vllm-mlx provides a FastAPI server with full OpenAI API compatibility. +By default the server binds only to `127.0.0.1`. Use `--host 0.0.0.0` only when you intentionally want to expose it beyond the local machine. + ## Starting the Server ### Simple Mode (Default) @@ -33,7 +35,7 @@ vllm-mlx serve mlx-community/Llama-3.2-3B-Instruct-4bit --port 8000 --continuous | Option | Description | Default | |--------|-------------|---------| | `--port` | Server port | 8000 | -| `--host` | Server host | 0.0.0.0 | +| `--host` | Server host | 127.0.0.1 | | `--api-key` | API key for authentication | None | | `--rate-limit` | Requests per minute per client (0 = disabled) | 0 | | `--timeout` | Request timeout in seconds | 300 | diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 85c8d9c35..09ca5249b 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -24,7 +24,7 @@ vllm-mlx serve [options] |--------|-------------|---------| | `--served-model-name` | Custom model name exposed through the OpenAI API. If not set, the model path is used as the name. | None | | `--port` | Server port | 8000 | -| `--host` | Server host | 0.0.0.0 | +| `--host` | Server host | 127.0.0.1 | | `--api-key` | API key for authentication | None | | `--rate-limit` | Requests per minute per client (0 = disabled) | 0 | | `--timeout` | Request timeout in seconds | 300 | diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index dcdff9d78..a8245f885 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -6,7 +6,7 @@ | Option | Description | Default | |--------|-------------|---------| -| `--host` | Server host address | `0.0.0.0` | +| `--host` | Server host address | `127.0.0.1` | | `--port` | Server port | `8000` | | `--max-tokens` | Default max tokens | `32768` | | `--default-temperature` | Default temperature when not specified in request | None | diff --git a/tests/test_server.py b/tests/test_server.py index b8d2c3fd3..a9b24f773 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -190,6 +190,23 @@ def test_trust_remote_code_flag_defaults_false(self): ) assert args.trust_remote_code is True + def test_host_defaults_to_localhost(self): + """Serve parsers should bind only to localhost unless overridden.""" + from vllm_mlx.cli import create_parser as create_cli_parser + from vllm_mlx.server import create_parser as create_server_parser + + cli_parser = create_cli_parser() + cli_args = cli_parser.parse_args( + ["serve", "mlx-community/Llama-3.2-3B-Instruct-4bit"] + ) + assert cli_args.host == "127.0.0.1" + + server_parser = create_server_parser() + server_args = server_parser.parse_args( + ["--model", "mlx-community/Llama-3.2-3B-Instruct-4bit"] + ) + assert server_args.host == "127.0.0.1" + def test_tool_call_parser_accepts_harmony_aliases(self): """GPT-OSS/Harmony parsers should be selectable from the serve CLI.""" from vllm_mlx.cli import create_parser diff --git a/vllm_mlx/cli.py b/vllm_mlx/cli.py index e2c939895..9c3b4495c 100644 --- a/vllm_mlx/cli.py +++ b/vllm_mlx/cli.py @@ -671,7 +671,10 @@ def create_parser() -> argparse.ArgumentParser: help="The model name used in the API. If not specified, the model argument is used.", ) serve_parser.add_argument( - "--host", type=str, default="0.0.0.0", help="Host to bind" + "--host", + type=str, + default="127.0.0.1", + help="Host to bind (default: localhost; use 0.0.0.0 to expose externally)", ) serve_parser.add_argument("--port", type=int, default=8000, help="Port to bind") serve_parser.add_argument( diff --git a/vllm_mlx/server.py b/vllm_mlx/server.py index 95c8c610e..7781d0a73 100644 --- a/vllm_mlx/server.py +++ b/vllm_mlx/server.py @@ -4335,8 +4335,8 @@ def create_parser() -> argparse.ArgumentParser: parser.add_argument( "--host", type=str, - default="0.0.0.0", - help="Host to bind to", + default="127.0.0.1", + help="Host to bind to (default: localhost; use 0.0.0.0 to expose externally)", ) parser.add_argument( "--port",