containers · rhatdan · Sep 23, 2024 · Sep 21, 2024
@@ -77,6 +77,9 @@ show this help message and exit
 do not run ramalama in the default container (default: False)
 use environment variable "RAMALAMA_IN_CONTAINER=false" to change default.
 
+#### **--runtime**
+specify the runtime to use, valid options are 'llama.cpp' and 'vllm' (default: llama.cpp)
+
 #### **--store**=STORE
 store AI Models in the specified directory (default rootless: `$HOME/.local/share/ramalama`, default rootful: `/var/lib/ramalama`)
 

@@ -38,6 +38,12 @@ def init_cli():
     )
     parser.add_argument("--store", default=get_store(), help="store AI Models in the specified directory")
     parser.add_argument("--dryrun", action="store_true", help="show container runtime command without executing it")
+    parser.add_argument(
+        "--runtime",
+        default="llama.cpp",
+        choices=["llama.cpp", "vllm"],
+        help="specify the runtime to use, valid options are 'llama.cpp' and 'vllm'",
+    )
     parser.add_argument(
         "--nocontainer",
         default=not use_container(),
@@ -316,7 +322,7 @@ def run_parser(subparsers):
     parser.add_argument("--prompt", dest="prompt", action="store_true", help="modify chatbot prompt")
     parser.add_argument("-n", "--name", dest="name", help="name of container in which the Model will be run")
     parser.add_argument("MODEL")  # positional argument
-    parser.add_argument("ARGS", nargs="*", help="Additional options to pass to the AI Model")
+    parser.add_argument("ARGS", nargs="*", help="additional options to pass to the AI Model")
     parser.set_defaults(func=run_cli)
 
 

@@ -102,5 +102,8 @@ def run(self, args):
 
     def serve(self, args):
         symlink_path = self.pull(args)
-        exec_args = ["llama-server", "--port", args.port, "-m", symlink_path] + self.common_params
+        exec_args = ["llama-server", "--port", args.port, "-m", symlink_path]
+        if args.runtime == "vllm":
+            exec_args = ["vllm", "serve", "--port", args.port, symlink_path]
+
         exec_cmd(exec_args)