diff --git a/tests/test_server.py b/tests/test_server.py
index ad8e0a9b9..c0450548d 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -168,6 +168,41 @@ def test_basic_completion_request(self):
         assert request.max_tokens is None  # uses _default_max_tokens when None
 
 
+class TestServeCli:
+    """Test serve CLI argument parsing."""
+
+    def test_tool_call_parser_accepts_harmony_aliases(self):
+        """GPT-OSS/Harmony parsers should be selectable from the serve CLI."""
+        from vllm_mlx.cli import create_parser
+
+        parser = create_parser()
+        args = parser.parse_args(
+            [
+                "serve",
+                "lmstudio-community/gpt-oss-20b-MLX-8bit",
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "harmony",
+            ]
+        )
+
+        assert args.command == "serve"
+        assert args.tool_call_parser == "harmony"
+        assert args.enable_auto_tool_choice is True
+
+        args = parser.parse_args(
+            [
+                "serve",
+                "lmstudio-community/gpt-oss-20b-MLX-8bit",
+                "--enable-auto-tool-choice",
+                "--tool-call-parser",
+                "gpt-oss",
+            ]
+        )
+
+        assert args.tool_call_parser == "gpt-oss"
+
+
 # =============================================================================
 # Helper Function Tests
 # =============================================================================
diff --git a/vllm_mlx/cli.py b/vllm_mlx/cli.py
index bba5163d4..ee6d7cba5 100644
--- a/vllm_mlx/cli.py
+++ b/vllm_mlx/cli.py
@@ -633,7 +633,8 @@ def bench_kv_cache_command(args):
     )
 
 
-def main():
+def create_parser() -> argparse.ArgumentParser:
+    """Build the top-level CLI parser."""
     parser = argparse.ArgumentParser(
         description="vllm-mlx: Apple Silicon MLX backend for vLLM",
         formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -880,6 +881,8 @@ def main():
             "qwen3_coder",
             "llama",
             "hermes",
+            "harmony",
+            "gpt-oss",
             "deepseek",
             "kimi",
             "granite",
@@ -893,7 +896,8 @@ def main():
         help=(
             "Select the tool call parser for the model. Options: "
             "auto (auto-detect), mistral, qwen, qwen3_coder, llama, hermes, "
-            "deepseek, gemma4, kimi, granite, nemotron, xlam, functionary, glm47, minimax. "
+            "harmony, gpt-oss, deepseek, gemma4, kimi, granite, nemotron, "
+            "xlam, functionary, glm47, minimax. "
             "Required for --enable-auto-tool-choice."
         ),
     )
@@ -1113,6 +1117,12 @@ def main():
         action="store_true",
         help="Download as multimodal model (broader file patterns)",
     )
+
+    return parser
+
+
+def main():
+    parser = create_parser()
     args = parser.parse_args()
 
     if args.command == "serve":