Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tests/test_harmony_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,23 @@ def test_invalid_parser_not_registered(self):
ToolParserManager.get_tool_parser("nonexistent_parser")


class TestServeLogLevelFlags:
def test_cli_serve_has_log_level_flag(self):
import importlib
import inspect

source = inspect.getsource(importlib.import_module("vllm_mlx.cli").main)
assert '"--log-level"' in source
assert 'choices=["DEBUG", "INFO", "WARNING", "ERROR"]' in source

def test_module_server_has_log_level_flag(self):
from pathlib import Path

source = Path("vllm_mlx/server.py").read_text()
assert '"--log-level"' in source
assert 'choices=["DEBUG", "INFO", "WARNING", "ERROR"]' in source


# ============================================================================
# SUPPORTS_NATIVE_TOOL_FORMAT Tests
# ============================================================================
Expand Down
10 changes: 9 additions & 1 deletion vllm_mlx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def serve_command(args):
from .server import RateLimiter, app, load_model

logger = logging.getLogger(__name__)
uvicorn_log_level = server.configure_logging(args.log_level)

# Validate tool calling arguments
if args.enable_auto_tool_choice and not args.tool_call_parser:
Expand Down Expand Up @@ -365,7 +366,7 @@ def serve_command(args):
app,
host=args.host,
port=args.port,
log_level="info",
log_level=uvicorn_log_level,
timeout_keep_alive=30,
)

Expand Down Expand Up @@ -798,6 +799,13 @@ def main():
"--host", type=str, default="0.0.0.0", help="Host to bind"
)
serve_parser.add_argument("--port", type=int, default=8000, help="Port to bind")
serve_parser.add_argument(
"--log-level",
type=str,
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO",
help="Log level for Python logging and uvicorn",
)
serve_parser.add_argument(
"--max-num-seqs", type=int, default=256, help="Max concurrent sequences"
)
Expand Down
21 changes: 20 additions & 1 deletion vllm_mlx/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def normalize_log_level(log_level: str) -> str:
return log_level.upper()


def configure_logging(log_level: str) -> str:
normalized = normalize_log_level(log_level)
logging.getLogger().setLevel(getattr(logging, normalized, logging.INFO))
logger.setLevel(getattr(logging, normalized, logging.INFO))
return normalized.lower()

# Global engine instance
_engine: BaseEngine | None = None
_model_name: str | None = None
Expand Down Expand Up @@ -3255,6 +3266,13 @@ def main():
default=8000,
help="Port to bind to",
)
parser.add_argument(
"--log-level",
type=str,
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO",
help="Log level for Python logging and uvicorn",
)
parser.add_argument(
"--mllm",
action="store_true",
Expand Down Expand Up @@ -3413,6 +3431,7 @@ def main():
)

args = parser.parse_args()
uvicorn_log_level = configure_logging(args.log_level)

# Set global configuration
global _api_key, _default_timeout, _rate_limiter
Expand Down Expand Up @@ -3509,7 +3528,7 @@ def main():
)

# Start server
uvicorn.run(app, host=args.host, port=args.port)
uvicorn.run(app, host=args.host, port=args.port, log_level=uvicorn_log_level)


if __name__ == "__main__":
Expand Down