containers · rhatdan · Apr 8, 2025 · Apr 8, 2025 · ericcurtin · Apr 8, 2025
@@ -13,4 +13,5 @@ venv/
 *.container
 *.image
 *.volume
-__pycache__/
+__pycache__/
+.aider*
@@ -0,0 +1,28 @@
+% ramalama-client 1
+
+## NAME
+ramalama\-client - interact with the AI Model server (experimental)
+
+## SYNOPSIS
+**ramalama client** [*options*] _host_
+
+## OPTIONS
+
+#### **--help**, **-h**
+show this help message and exit
+
+## DESCRIPTION
+Interact with a AI Model server. The client can send queries to the AI Model server and retrieve responses.
+
+## EXAMPLES
+
+### Connect to the AI Model server.
+```
+$ ramalama client http://127.0.0.1:8080
+```
+
+## SEE ALSO
+**[ramalama(1)](ramalama.1.md)**, **[ramalama-serve(1)](ramalama-serve.1.md)**
+
+## HISTORY
+Apr 2025, Originally compiled by Eric Curtin <[email protected]>
@@ -175,6 +175,7 @@ It adds support for model versioning and multiple files such as chat templates.
 | Command                                           | Description                                                |
 | ------------------------------------------------- | ---------------------------------------------------------- |
 | [ramalama-bench(1)](ramalama-bench.1.md)          | benchmark specified AI Model                               |
+| [ramalama-client(1)](ramalama-client.1.md)        | interact with the AI Model server (experimental)           |
 | [ramalama-containers(1)](ramalama-containers.1.md)| list all RamaLama containers                               |
 | [ramalama-convert(1)](ramalama-convert.1.md)      | convert AI Models from local storage to OCI Image          |
 | [ramalama-info(1)](ramalama-info.1.md)            | display RamaLama configuration information                 |

@@ -9,7 +9,7 @@
 
 import ramalama.oci
 import ramalama.rag
-from ramalama.common import accel_image, get_accel, perror, run_cmd
+from ramalama.common import accel_image, exec_cmd, get_accel, get_cmd_with_wrapper, perror, run_cmd
 from ramalama.config import CONFIG
 from ramalama.model import MODEL_TYPES
 from ramalama.model_factory import ModelFactory
@@ -162,6 +162,7 @@ def configure_subcommands(parser):
     subparsers = parser.add_subparsers(dest="subcommand")
     subparsers.required = False
     bench_parser(subparsers)
+    client_parser(subparsers)
     containers_parser(subparsers)
     convert_parser(subparsers)
     help_parser(subparsers)
@@ -924,6 +925,16 @@ def version_parser(subparsers):
     parser.set_defaults(func=print_version)
 
 
+def client_parser(subparsers):
+    """Add parser for client command"""
+    parser = subparsers.add_parser("client", help="interact with an OpenAI endpoint")
+    parser.add_argument("HOST", help="host to connect to")  # positional argument
+    parser.add_argument(
+        "ARGS", nargs="*", help="overrides the default prompt, and the output is returned without entering the chatbot"
+    )
+    parser.set_defaults(func=client_cli)
+
+
 def rag_parser(subparsers):
     parser = subparsers.add_parser(
         "rag",
@@ -997,6 +1008,13 @@ def New(model, args, transport=CONFIG["transport"]):
     return ModelFactory(model, args, transport=transport).create()
 
 
+def client_cli(args):
+    """Handle client command execution"""
+    client_args = ["ramalama-client-core", "-c", "2048", "--temp", "0.8", args.HOST] + args.ARGS
+    client_args[0] = get_cmd_with_wrapper(client_args)
+    exec_cmd(client_args)
+
+
 def perplexity_parser(subparsers):
     parser = subparsers.add_parser("perplexity", help="calculate perplexity for specified AI Model")
     run_serve_perplexity_args(parser)

@@ -496,6 +496,14 @@ def tagged_image(image):
     return f"{image}:{minor_release()}"
 
 
+def get_cmd_with_wrapper(cmd_args):
+    for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]:
+        if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"):
+            return f"{dir}libexec/ramalama/{cmd_args[0]}"
+
+    return ""
+
+
 def accel_image(config, args):
     if args and len(args.image.split(":")) > 1:
         return args.image

@@ -15,6 +15,7 @@
     exec_cmd,
     genname,
     get_accel_env_vars,
+    get_cmd_with_wrapper,
     run_cmd,
     set_accel_env_vars,
 )
@@ -396,17 +397,10 @@ def gpu_args(self, args, runner=False):
 
         return gpu_args
 
-    def get_cmd_with_wrapper(self, cmd_args):
-        for dir in ["", "/opt/homebrew/", "/usr/local/", "/usr/"]:
-            if os.path.exists(f"{dir}libexec/ramalama/{cmd_args[0]}"):
-                return f"{dir}libexec/ramalama/{cmd_args[0]}"
-
-        return ""
-
     def exec_model_in_container(self, model_path, cmd_args, args):
         if not args.container:
             if USE_RAMALAMA_WRAPPER:
-                cmd_args[0] = self.get_cmd_with_wrapper(cmd_args)
+                cmd_args[0] = get_cmd_with_wrapper(cmd_args)
 
             return False