diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md
index d15cf791..f57d99c4 100644
--- a/docs/ramalama.1.md
+++ b/docs/ramalama.1.md
@@ -77,6 +77,9 @@ show this help message and exit
 do not run ramalama in the default container (default: False)
 use environment variable "RAMALAMA_IN_CONTAINER=false" to change default.
 
+#### **--runtime**
+specify the runtime to use, valid options are 'llama.cpp' and 'vllm' (default: llama.cpp)
+
 #### **--store**=STORE
 store AI Models in the specified directory (default rootless: `$HOME/.local/share/ramalama`, default rootful: `/var/lib/ramalama`)
 
diff --git a/ramalama/cli.py b/ramalama/cli.py
index 0732ba30..264cb2c7 100644
--- a/ramalama/cli.py
+++ b/ramalama/cli.py
@@ -38,6 +38,12 @@ def init_cli():
     )
     parser.add_argument("--store", default=get_store(), help="store AI Models in the specified directory")
     parser.add_argument("--dryrun", action="store_true", help="show container runtime command without executing it")
+    parser.add_argument(
+        "--runtime",
+        default="llama.cpp",
+        choices=["llama.cpp", "vllm"],
+        help="specify the runtime to use, valid options are 'llama.cpp' and 'vllm'",
+    )
     parser.add_argument(
         "--nocontainer",
         default=not use_container(),
@@ -316,7 +322,7 @@ def run_parser(subparsers):
     parser.add_argument("--prompt", dest="prompt", action="store_true", help="modify chatbot prompt")
     parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
     parser.add_argument("MODEL")  # positional argument
-    parser.add_argument("ARGS", nargs="*", help="Additional options to pass to the AI Model")
+    parser.add_argument("ARGS", nargs="*", help="additional options to pass to the AI Model")
     parser.set_defaults(func=run_cli)
 
 
diff --git a/ramalama/model.py b/ramalama/model.py
index 92a2d228..e94d40c4 100644
--- a/ramalama/model.py
+++ b/ramalama/model.py
@@ -102,5 +102,8 @@ def run(self, args):
 
     def serve(self, args):
         symlink_path = self.pull(args)
-        exec_args = ["llama-server", "--port", args.port, "-m", symlink_path] + self.common_params
+        exec_args = ["llama-server", "--port", args.port, "-m", symlink_path]
+        if args.runtime == "vllm":
+            exec_args = ["vllm", "serve", "--port", args.port, symlink_path]
+
         exec_cmd(exec_args)