diff --git a/.gitignore b/.gitignore index 331661364..73e1d9b69 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ venv/ *.container *.image *.volume -__pycache__/ \ No newline at end of file +__pycache__/ +.aider* diff --git a/docs/ramalama-client.1.md b/docs/ramalama-client.1.md new file mode 100644 index 000000000..c78fa0707 --- /dev/null +++ b/docs/ramalama-client.1.md @@ -0,0 +1,28 @@ +% ramalama-client 1 + +## NAME +ramalama\-client - interact with the AI Model server (experimental) + +## SYNOPSIS +**ramalama client** [*options*] _host_ + +## OPTIONS + +#### **--help**, **-h** +show this help message and exit + +## DESCRIPTION +Interact with a AI Model server. The client can send queries to the AI Model server and retrieve responses. + +## EXAMPLES + +### Connect to the AI Model server. +``` +$ ramalama client http://127.0.0.1:8080 +``` + +## SEE ALSO +**[ramalama(1)](ramalama.1.md)**, **[ramalama-serve(1)](ramalama-serve.1.md)** + +## HISTORY +Apr 2025, Originally compiled by Eric Curtin diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md index ea218fe02..75ad533a7 100644 --- a/docs/ramalama.1.md +++ b/docs/ramalama.1.md @@ -175,6 +175,7 @@ It adds support for model versioning and multiple files such as chat templates. | Command | Description | | ------------------------------------------------- | ---------------------------------------------------------- | | [ramalama-bench(1)](ramalama-bench.1.md) | benchmark specified AI Model | +| [ramalama-client(1)](ramalama-client.1.md) | interact with the AI Model server (experimental) | | [ramalama-containers(1)](ramalama-containers.1.md)| list all RamaLama containers | | [ramalama-convert(1)](ramalama-convert.1.md) | convert AI Models from local storage to OCI Image | | [ramalama-info(1)](ramalama-info.1.md) | display RamaLama configuration information | diff --git a/ramalama/cli.py b/ramalama/cli.py index dc4b3bec8..4fc1d64b3 100644 --- a/ramalama/cli.py +++ b/ramalama/cli.py @@ -9,7 +9,7 @@ import ramalama.oci import ramalama.rag -from ramalama.common import accel_image, get_accel, perror, run_cmd +from ramalama.common import accel_image, exec_cmd, get_accel, get_cmd_with_wrapper, perror, run_cmd from ramalama.config import CONFIG from ramalama.model import MODEL_TYPES from ramalama.model_factory import ModelFactory @@ -162,6 +162,7 @@ def configure_subcommands(parser): subparsers = parser.add_subparsers(dest="subcommand") subparsers.required = False bench_parser(subparsers) + client_parser(subparsers) containers_parser(subparsers) convert_parser(subparsers) help_parser(subparsers) @@ -924,6 +925,16 @@ def version_parser(subparsers): parser.set_defaults(func=print_version) +def client_parser(subparsers): + """Add parser for client command""" + parser = subparsers.add_parser("client", help="interact with an OpenAI endpoint") + parser.add_argument("HOST", help="host to connect to") # positional argument + parser.add_argument( + "ARGS", nargs="*", help="overrides the default prompt, and the output is returned without entering the chatbot" + ) + parser.set_defaults(func=client_cli) + + def rag_parser(subparsers): parser = subparsers.add_parser( "rag", @@ -997,6 +1008,13 @@ def New(model, args, transport=CONFIG["transport"]): return ModelFactory(model, args, transport=transport).create() +def client_cli(args): + """Handle client command execution""" + client_args = ["ramalama-client-core", "-c", "2048", "--temp", "0.8", args.HOST] + args.ARGS + client_args[0] = get_cmd_with_wrapper(client_args) + exec_cmd(client_args) + + def perplexity_parser(subparsers): parser = subparsers.add_parser("perplexity", help="calculate perplexity for specified AI Model") run_serve_perplexity_args(parser) diff --git a/ramalama/common.py b/ramalama/common.py index feebdb361..623b6644d 100644 --- a/ramalama/common.py +++ b/ramalama/common.py @@ -496,6 +496,14 @@ def tagged_image(image): return f"{image}:{minor_release()}" +def get_cmd_with_wrapper(cmd_args): + for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]: + if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"): + return f"{dir}libexec/ramalama/{cmd_args[0]}" + + return "" + + def accel_image(config, args): if args and len(args.image.split(":")) > 1: return args.image diff --git a/ramalama/model.py b/ramalama/model.py index 67e02c13c..6e8ba1f3a 100644 --- a/ramalama/model.py +++ b/ramalama/model.py @@ -15,6 +15,7 @@ exec_cmd, genname, get_accel_env_vars, + get_cmd_with_wrapper, run_cmd, set_accel_env_vars, ) @@ -396,17 +397,10 @@ def gpu_args(self, args, runner=False): return gpu_args - def get_cmd_with_wrapper(self, cmd_args): - for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]: - if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"): - return f"{dir}libexec/ramalama/{cmd_args[0]}" - - return "" - def exec_model_in_container(self, model_path, cmd_args, args): if not args.container: if USE_RAMALAMA_WRAPPER: - cmd_args[0] = self.get_cmd_with_wrapper(cmd_args) + cmd_args[0] = get_cmd_with_wrapper(cmd_args) return False