Skip to content

Commit 608bb9f

Browse files
committed
Add runtime flag so we can alternatively serve via vllm
Also added 'ls' alias for 'list'. Port logic is buggy, just support --port for now. Pass 'args' to pull function, instead of "args.store". Signed-off-by: Eric Curtin <[email protected]>
1 parent 900cefe commit 608bb9f

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

ramalama/cli.py

+15-17
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def init_cli():
3333
parser.add_argument("--dryrun",
3434
action='store_true',
3535
help="show container runtime command without executing it")
36+
parser.add_argument("--runtime",
37+
default="llama.cpp",
38+
choices=["llama.cpp", "vllm"],
39+
help="specify the runtime to use (default: llama.cpp). Valid options are 'llama.cpp' and 'vllm'.")
3640
parser.add_argument("--nocontainer",
3741
default=not use_container(),
3842
action='store_true',
@@ -163,16 +167,20 @@ def list_files_by_modification():
163167
reverse=True)
164168

165169

166-
def list_parser(subparsers):
167-
parser = subparsers.add_parser(
168-
'list', help='List all downloaded AI Models')
170+
def add_list_parser(subparsers, name, func):
171+
parser = subparsers.add_parser(name, help='List all downloaded AI Models')
169172
parser.add_argument("-n", "--noheading", dest="noheading",
170173
action='store_true',
171174
help="do not display heading")
172175
parser.add_argument("--json", dest="json",
173176
action='store_true',
174177
help="print using json")
175-
parser.set_defaults(func=list_cli)
178+
parser.set_defaults(func=func)
179+
180+
181+
def list_parser(subparsers):
182+
add_list_parser(subparsers, 'list', list_cli)
183+
add_list_parser(subparsers, 'ls', list_cli)
176184

177185

178186
def list_cli(args):
@@ -247,15 +255,9 @@ def run_cli(args):
247255

248256

249257
def serve_parser(subparsers):
250-
port = "8080"
251-
host = os.getenv('RAMALAMA_HOST', port)
252-
split = host.rsplit(':', 1)
253-
if len(split) > 1:
254-
port = split[1]
255-
256258
parser = subparsers.add_parser(
257-
'serve', help='Serve RESTAPI on specified AI Model')
258-
parser.add_argument("--port", default=port,
259+
'serve', help='Serve REST API on specified AI Model')
260+
parser.add_argument("--port", default="8080",
259261
help="port for AI Model server to listen on")
260262
parser.add_argument('model') # positional argument
261263
parser.set_defaults(func=serve_cli)
@@ -299,9 +301,6 @@ def run_container(args):
299301
wd = target
300302
break
301303

302-
port = "8080"
303-
host = os.getenv('RAMALAMA_HOST', port)
304-
305304
conman_args = [conman, "run",
306305
"--rm",
307306
"-it",
@@ -310,9 +309,8 @@ def run_container(args):
310309
f"-v{home}:{home}",
311310
"-v/tmp:/tmp",
312311
f"-v{sys.argv[0]}:/usr/bin/ramalama:ro",
313-
"-e", "RAMALAMA_HOST",
314312
"-e", "RAMALAMA_TRANSPORT",
315-
"-p", f"{host}:{port}",
313+
"-p", f"{args.port}:{args.port}",
316314
f"-v{wd}:/usr/share/ramalama/ramalama:ro"]
317315
if os.path.exists("/dev/dri"):
318316
conman_args += ["--device", "/dev/dri"]

ramalama/model.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,18 @@ def push(self, args):
3131
f"ramalama push for {self.type} not implemented")
3232

3333
def run(self, args):
34-
symlink_path = self.pull(args.store)
34+
symlink_path = self.pull(args)
3535
exec_cmd(["llama-cli", "-m",
3636
symlink_path, "--log-disable", "-cnv", "-p", "You are a helpful assistant"])
3737

3838
def serve(self, args):
39-
symlink_path = self.pull(args.store)
39+
symlink_path = self.pull(args)
4040

41-
if args.port:
42-
args.port = os.getenv("RAMALAMA_HOST")
41+
# Check the runtime argument and execute the corresponding command
42+
if args.runtime == "vllm":
43+
exec_cmd(["vllm", "serve", "--port", args.port, symlink_path])
44+
elif args.runtime == "llama.cpp":
45+
exec_cmd(["llama-server", "--port", args.port, "-m", symlink_path])
46+
else:
47+
raise ValueError(f"Unsupported runtime: {args.runtime}")
4348

44-
exec_cmd(["llama-server", "--port", args.port, "-m", symlink_path])

0 commit comments

Comments
 (0)