Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,17 @@ def run_parser(subparsers):


def run_cli(args):
model = New(args.MODEL, args)
model.run(args)
try:
model = New(args.MODEL, args)
model.run(args)

except KeyError as e:
try:
args.quiet = True
model = OCI(args.MODEL, args.engine, ignore_stderr=True)
model.run(args)
except Exception:
raise e
Comment on lines +850 to +851
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Re-raising the original KeyError in the fallback may mask the actual OCI error.

If the OCI branch fails, its underlying error will be lost. Consider chaining the exceptions or re-raising the OCI error to better surface the true cause.

Suggested change
except Exception:
raise e
except Exception as oci_error:
raise oci_error from e



def serve_parser(subparsers):
Expand All @@ -863,8 +872,17 @@ def serve_parser(subparsers):
def serve_cli(args):
if not args.container:
args.detach = False
model = New(args.MODEL, args)
model.serve(args)

try:
model = New(args.MODEL, args)
model.serve(args)
except KeyError as e:
try:
args.quiet = True
model = OCI(args.MODEL, args.engine, ignore_stderr=True)
model.serve(args)
except Exception:
raise e


def stop_parser(subparsers):
Expand Down Expand Up @@ -969,8 +987,8 @@ def _rm_model(models, args):
raise e
try:
# attempt to remove as a container image
m = OCI(model, args.engine)
m.remove(args, ignore_stderr=True)
m = OCI(model, args.engine, ignore_stderr=True)
m.remove(args)
return
except Exception:
pass
Expand Down
5 changes: 2 additions & 3 deletions ramalama/kube.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class Kube:
def __init__(self, model, args, exec_args):
def __init__(self, model, image, args, exec_args):
self.ai_image = model
if hasattr(args, "MODEL"):
self.ai_image = args.MODEL
Expand All @@ -18,8 +18,7 @@ def __init__(self, model, args, exec_args):
self.model = model.removeprefix("oci://")
self.args = args
self.exec_args = exec_args

self.image = args.image
self.image = image

def gen_volumes(self):
mounts = f"""\
Expand Down
9 changes: 5 additions & 4 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ def handle_runtime(self, args, exec_args, exec_model_path):
return exec_args

def generate_container_config(self, model_path, args, exec_args):
self.image = self._image(args)
if args.generate == "quadlet":
self.quadlet(model_path, args, exec_args)
elif args.generate == "kube":
Expand Down Expand Up @@ -526,17 +527,17 @@ def serve(self, args):
self.execute_command(model_path, exec_args, args)

def quadlet(self, model, args, exec_args):
quadlet = Quadlet(model, args, exec_args)
quadlet = Quadlet(model, self.image, args, exec_args)
quadlet.generate()

def quadlet_kube(self, model, args, exec_args):
kube = Kube(model, args, exec_args)
kube = Kube(model, self.image, args, exec_args)
kube.generate()
quadlet = Quadlet(model, args, exec_args)
quadlet = Quadlet(model, self.image, args, exec_args)
quadlet.kube()

def kube(self, model, args, exec_args):
kube = Kube(model, args, exec_args)
kube = Kube(model, self.image, args, exec_args)
kube.generate()

def path(self, args):
Expand Down
14 changes: 8 additions & 6 deletions ramalama/oci.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,14 @@ def list_models(args):


class OCI(Model):
def __init__(self, model, conman):
def __init__(self, model, conman, ignore_stderr=False):
super().__init__(model.removeprefix(prefix).removeprefix("docker://"))
for t in MODEL_TYPES:
if self.model.startswith(t + "://"):
raise ValueError(f"{model} invalid: Only OCI Model types supported")
self.type = "OCI"
self.conman = conman
self.ignore_stderr = ignore_stderr

def login(self, args):
conman_args = [self.conman, "login"]
Expand Down Expand Up @@ -298,7 +299,8 @@ def push(self, source, args):
raise e

def pull(self, args):
print(f"Downloading {self.model}...")
if not args.quiet:
print(f"Downloading {self.model}...")
if not args.engine:
raise NotImplementedError("OCI images require a container engine like Podman or Docker")

Expand All @@ -310,7 +312,7 @@ def pull(self, args):
if args.authfile:
conman_args.extend([f"--authfile={args.authfile}"])
conman_args.extend([self.model])
run_cmd(conman_args, debug=args.debug)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
return MNT_FILE

def _registry_reference(self):
Expand Down Expand Up @@ -346,10 +348,10 @@ def remove(self, args, ignore_stderr=False):

try:
conman_args = [self.conman, "manifest", "rm", self.model]
run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
except subprocess.CalledProcessError:
conman_args = [self.conman, "rmi", f"--force={args.ignore}", self.model]
run_cmd(conman_args, debug=args.debug, ignore_stderr=ignore_stderr)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)

def exists(self, args):
try:
Expand All @@ -364,7 +366,7 @@ def exists(self, args):

conman_args = [self.conman, "image", "inspect", self.model]
try:
run_cmd(conman_args, debug=args.debug)
run_cmd(conman_args, debug=args.debug, ignore_stderr=self.ignore_stderr)
return self.model
except Exception:
return None
7 changes: 4 additions & 3 deletions ramalama/quadlet.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os

from ramalama.common import MNT_DIR, MNT_FILE, default_image, get_env_vars
from ramalama.common import MNT_DIR, MNT_FILE, get_env_vars


class Quadlet:
def __init__(self, model, args, exec_args):
def __init__(self, model, image, args, exec_args):
self.ai_image = model
if hasattr(args, "MODEL"):
self.ai_image = args.MODEL
Expand All @@ -17,6 +17,7 @@ def __init__(self, model, args, exec_args):
self.model = model.removeprefix("oci://")
self.args = args
self.exec_args = exec_args
self.image = image

def kube(self):
outfile = self.name + ".kube"
Expand Down Expand Up @@ -64,7 +65,7 @@ def generate(self):
AddDevice=-/dev/dri
AddDevice=-/dev/kfd
Exec={" ".join(self.exec_args)}
Image={default_image()}
Image={self.image}
{env_var_string}
{volume}
{name_string}
Expand Down
6 changes: 3 additions & 3 deletions test/system/040-serve.bats
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,11 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*command: \[\"--port\"\]" "command is correct"
is "$output" ".*args: \['1234', '--model', '/mnt/models/model.file', '--max_model_len', '2048'\]" "args are correct"

is "$output" ".*image: quay.io/ramalama/ramalama" "image is correct"
is "$output" ".*reference: ${ociimage}" "AI image should be created"
is "$output" ".*pullPolicy: IfNotPresent" "pullPolicy should exist"

run_ramalama rm oci://${ociimage}
rm $name.yaml
done
stop_registry
}
Expand All @@ -257,7 +257,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*Generating Kubernetes YAML file: ${name}.yaml" "generate .yaml file"

run cat $name.yaml
is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
is "$output" ".*command: \[\"llama-server\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"

Expand All @@ -274,7 +273,6 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"
is "$output" ".*Generating quadlet file: ${name}.kube" "generate .kube file"

run cat $name.yaml
is "$output" ".*image: quay.io/ramalama/ramalama" "Should container image"
is "$output" ".*command: \[\"llama-server\"\]" "Should command"
is "$output" ".*containerPort: 1234" "Should container container port"

Expand All @@ -288,6 +286,8 @@ verify_begin=".*run --rm -i --label ai.ramalama --name"

run cat $name.kube
is "$output" ".*Yaml=$name.yaml" "Should container container port"
rm $name.kube
rm $name.yaml
}

# vim: filetype=sh
24 changes: 17 additions & 7 deletions test/system/055-convert.bats
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,30 @@ load helpers

@test "ramalama convert tiny to image" {
skip_if_darwin
skip_if_docker
run_ramalama pull tiny
run_ramalama convert tiny oci://ramalama/tiny
run_ramalama convert tiny oci://quay.io/ramalama/tiny
run_ramalama list
is "$output" ".*ramalama/tiny:latest"
run_ramalama rm ramalama/tiny
# FIXME: This test will work on all podman 5.3 and greater clients.
# right now Ubuntu test suite is stuck on podman 5.0.3 Ubuntu 24.10 support
# it bug github is stuck on 24.04. Should change when 25.04 is released
# if is_container and not_docker; then
# cname=c_$(safename)
# run_podman version
# run_ramalama serve -n ${cname} -d quay.io/ramalama/tiny
# run_ramalama stop ${cname}
# fi
run_ramalama rm quay.io/ramalama/tiny
run_ramalama list
assert "$output" !~ ".*ramalama/tiny" "image was removed"
assert "$output" !~ ".*quay.io/ramalama/tiny" "image was removed"

run_ramalama convert ollama://tinyllama oci://ramalama/tiny
run_ramalama convert ollama://tinyllama oci://quay.io/ramalama/tinyllama
run_ramalama list
is "$output" ".*ramalama/tiny:latest"
run_ramalama rm ramalama/tiny
is "$output" ".*quay.io/ramalama/tinyllama:latest"
run_ramalama rm quay.io/ramalama/tinyllama
run_ramalama list
assert "$output" !~ ".*ramalama/tiny" "image was removed"
assert "$output" !~ ".*ramalama/tinyllama" "image was removed"

podman image prune --force
}
Expand Down
Loading