Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ venv/
*.container
*.image
*.volume
__pycache__/
__pycache__/
.aider*
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually used this aider tool to rewrite a lot of the boilerplate associated with introducing a new command.

28 changes: 28 additions & 0 deletions docs/ramalama-client.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
% ramalama-client 1

## NAME
ramalama\-client - interact with the AI Model server (experimental)

## SYNOPSIS
**ramalama client** [*options*] _host_

## OPTIONS

#### **--help**, **-h**
show this help message and exit

## DESCRIPTION
Interact with a AI Model server. The client can send queries to the AI Model server and retrieve responses.

## EXAMPLES

### Connect to the AI Model server.
```
$ ramalama client http://127.0.0.1:8080
```

## SEE ALSO
**[ramalama(1)](ramalama.1.md)**, **[ramalama-serve(1)](ramalama-serve.1.md)**

## HISTORY
Apr 2025, Originally compiled by Eric Curtin <[email protected]>
1 change: 1 addition & 0 deletions docs/ramalama.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ It adds support for model versioning and multiple files such as chat templates.
| Command | Description |
| ------------------------------------------------- | ---------------------------------------------------------- |
| [ramalama-bench(1)](ramalama-bench.1.md) | benchmark specified AI Model |
| [ramalama-client(1)](ramalama-client.1.md) | interact with the AI Model server (experimental) |
| [ramalama-containers(1)](ramalama-containers.1.md)| list all RamaLama containers |
| [ramalama-convert(1)](ramalama-convert.1.md) | convert AI Models from local storage to OCI Image |
| [ramalama-info(1)](ramalama-info.1.md) | display RamaLama configuration information |
Expand Down
20 changes: 19 additions & 1 deletion ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import ramalama.oci
import ramalama.rag
from ramalama.common import accel_image, get_accel, perror, run_cmd
from ramalama.common import accel_image, exec_cmd, get_accel, get_cmd_with_wrapper, perror, run_cmd
from ramalama.config import CONFIG
from ramalama.model import MODEL_TYPES
from ramalama.model_factory import ModelFactory
Expand Down Expand Up @@ -162,6 +162,7 @@ def configure_subcommands(parser):
subparsers = parser.add_subparsers(dest="subcommand")
subparsers.required = False
bench_parser(subparsers)
client_parser(subparsers)
containers_parser(subparsers)
convert_parser(subparsers)
help_parser(subparsers)
Expand Down Expand Up @@ -924,6 +925,16 @@ def version_parser(subparsers):
parser.set_defaults(func=print_version)


def client_parser(subparsers):
"""Add parser for client command"""
parser = subparsers.add_parser("client", help="interact with an OpenAI endpoint")
parser.add_argument("HOST", help="host to connect to") # positional argument
parser.add_argument(
"ARGS", nargs="*", help="overrides the default prompt, and the output is returned without entering the chatbot"
)
parser.set_defaults(func=client_cli)


def rag_parser(subparsers):
parser = subparsers.add_parser(
"rag",
Expand Down Expand Up @@ -997,6 +1008,13 @@ def New(model, args, transport=CONFIG["transport"]):
return ModelFactory(model, args, transport=transport).create()


def client_cli(args):
"""Handle client command execution"""
client_args = ["ramalama-client-core", "-c", "2048", "--temp", "0.8", args.HOST] + args.ARGS
client_args[0] = get_cmd_with_wrapper(client_args)
exec_cmd(client_args)


def perplexity_parser(subparsers):
parser = subparsers.add_parser("perplexity", help="calculate perplexity for specified AI Model")
run_serve_perplexity_args(parser)
Expand Down
8 changes: 8 additions & 0 deletions ramalama/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,14 @@ def tagged_image(image):
return f"{image}:{minor_release()}"


def get_cmd_with_wrapper(cmd_args):
for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]:
if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"):
return f"{dir}libexec/ramalama/{cmd_args[0]}"

return ""


def accel_image(config, args):
if args and len(args.image.split(":")) > 1:
return args.image
Expand Down
10 changes: 2 additions & 8 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
exec_cmd,
genname,
get_accel_env_vars,
get_cmd_with_wrapper,
run_cmd,
set_accel_env_vars,
)
Expand Down Expand Up @@ -396,17 +397,10 @@ def gpu_args(self, args, runner=False):

return gpu_args

def get_cmd_with_wrapper(self, cmd_args):
for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]:
if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"):
return f"{dir}libexec/ramalama/{cmd_args[0]}"

return ""

def exec_model_in_container(self, model_path, cmd_args, args):
if not args.container:
if USE_RAMALAMA_WRAPPER:
cmd_args[0] = self.get_cmd_with_wrapper(cmd_args)
cmd_args[0] = get_cmd_with_wrapper(cmd_args)

return False

Expand Down
Loading