Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add runtime flag so we can alternatively serve via vllm
Browse files Browse the repository at this point in the history
Also added 'ls' alias for 'list'. Port logic is buggy, just support
--port for now. Pass 'args' to pull function, instead of "args.store".

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
ericcurtin committed Aug 29, 2024
1 parent 900cefe commit e70d918
Showing 2 changed files with 25 additions and 24 deletions.
32 changes: 15 additions & 17 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,10 @@ def init_cli():
parser.add_argument("--dryrun",
action='store_true',
help="show container runtime command without executing it")
parser.add_argument("--runtime",
default="llama.cpp",
choices=["llama.cpp", "vllm"],
help="specify the runtime to use (default: llama.cpp). Valid options are 'llama.cpp' and 'vllm'.")
parser.add_argument("--nocontainer",
default=not use_container(),
action='store_true',
@@ -163,16 +167,20 @@ def list_files_by_modification():
reverse=True)


def list_parser(subparsers):
parser = subparsers.add_parser(
'list', help='List all downloaded AI Models')
def add_list_parser(subparsers, name, func):
parser = subparsers.add_parser(name, help='List all downloaded AI Models')
parser.add_argument("-n", "--noheading", dest="noheading",
action='store_true',
help="do not display heading")
parser.add_argument("--json", dest="json",
action='store_true',
help="print using json")
parser.set_defaults(func=list_cli)
parser.set_defaults(func=func)


def list_parser(subparsers):
add_list_parser(subparsers, 'list', list_cli)
add_list_parser(subparsers, 'ls', list_cli)


def list_cli(args):
@@ -247,15 +255,9 @@ def run_cli(args):


def serve_parser(subparsers):
port = "8080"
host = os.getenv('RAMALAMA_HOST', port)
split = host.rsplit(':', 1)
if len(split) > 1:
port = split[1]

parser = subparsers.add_parser(
'serve', help='Serve RESTAPI on specified AI Model')
parser.add_argument("--port", default=port,
'serve', help='Serve REST API on specified AI Model')
parser.add_argument("--port", default="8080",
help="port for AI Model server to listen on")
parser.add_argument('model') # positional argument
parser.set_defaults(func=serve_cli)
@@ -299,9 +301,6 @@ def run_container(args):
wd = target
break

port = "8080"
host = os.getenv('RAMALAMA_HOST', port)

conman_args = [conman, "run",
"--rm",
"-it",
@@ -310,9 +309,8 @@ def run_container(args):
f"-v{home}:{home}",
"-v/tmp:/tmp",
f"-v{sys.argv[0]}:/usr/bin/ramalama:ro",
"-e", "RAMALAMA_HOST",
"-e", "RAMALAMA_TRANSPORT",
"-p", f"{host}:{port}",
"-p", f"{args.port}:{args.port}",
f"-v{wd}:/usr/share/ramalama/ramalama:ro"]
if os.path.exists("/dev/dri"):
conman_args += ["--device", "/dev/dri"]
17 changes: 10 additions & 7 deletions ramalama/model.py
Original file line number Diff line number Diff line change
@@ -31,14 +31,17 @@ def push(self, args):
f"ramalama push for {self.type} not implemented")

def run(self, args):
symlink_path = self.pull(args.store)
symlink_path = self.pull(args)
exec_cmd(["llama-cli", "-m",
symlink_path, "--log-disable", "-cnv", "-p", "You are a helpful assistant"])

def serve(self, args):
symlink_path = self.pull(args.store)

if args.port:
args.port = os.getenv("RAMALAMA_HOST")

exec_cmd(["llama-server", "--port", args.port, "-m", symlink_path])
symlink_path = self.pull(args)

# Check the runtime argument and execute the corresponding command
if args.runtime == "vllm":
exec_cmd(["vllm", "serve", "--port", args.port, symlink_path])
elif args.runtime == "llama.cpp":
exec_cmd(["llama-server", "--port", args.port, "-m", symlink_path])
else:
raise ValueError(f"Unsupported runtime: {args.runtime}")

0 comments on commit e70d918

Please sign in to comment.