diff --git a/README.md b/README.md index c223cda2..240e39d7 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,36 @@ This directory contains the necessary files to build a Red Hat compatible contai - `llama` CLI tool installed: `pip install llama-stack` - Podman or Docker installed +## Build Modes + +The build script supports three modes: + +### 1. Full Mode (Default) +Includes all features including TrustyAI providers that require Kubernetes/OpenShift: +```bash +./distribution/build.py +``` + +### 2. Standalone Mode +Builds a version without Kubernetes dependencies, using Llama Guard for safety: +```bash +./distribution/build.py --standalone +``` + +### 3. Unified Mode (Recommended) +Builds a single container that supports both modes via environment variables: +```bash +./distribution/build.py --unified +``` + ## Generating the Containerfile The Containerfile is auto-generated from a template. To generate it: 1. Make sure you have the `llama` CLI tool installed -2. Run the build script from root of this git repo: +2. Run the build script from root of this git repo with your desired mode: ```bash - ./distribution/build.py + ./distribution/build.py [--standalone] [--unified] ``` This will: @@ -35,7 +57,47 @@ Once the Containerfile is generated, you can build the image using either Podman ### Using Podman build image for x86_64 ```bash -podman build --platform linux/amd64 -f distribution/Containerfile -t rh . +podman build --platform linux/amd64 -f distribution/Containerfile -t llama-stack-rh . +``` + +### Using Docker + +```bash +docker build -f distribution/Containerfile -t llama-stack-rh . +``` + +## Running the Container + +### Running in Standalone Mode (No Kubernetes) + +To run the container in standalone mode without Kubernetes dependencies, set the `STANDALONE` environment variable: + +```bash +# Using Docker +docker run -e STANDALONE=true \ + -e VLLM_URL=http://host.docker.internal:8000/v1 \ + -e INFERENCE_MODEL=your-model-name \ + -p 8321:8321 \ + llama-stack-rh + +# Using Podman +podman run -e STANDALONE=true \ + -e VLLM_URL=http://host.docker.internal:8000/v1 \ + -e INFERENCE_MODEL=your-model-name \ + -p 8321:8321 \ + llama-stack-rh +``` + +### Running in Full Mode (With Kubernetes) + +To run with all features including TrustyAI providers (requires Kubernetes/OpenShift): + +```bash +# Using Docker +docker run -p 8321:8321 llama-stack-rh + +# Using Podman +podman run -p 8321:8321 llama-stack-rh ``` ## Notes diff --git a/distribution/Containerfile b/distribution/Containerfile index 3aa47fbc..21a41f6a 100644 --- a/distribution/Containerfile +++ b/distribution/Containerfile @@ -1,5 +1,5 @@ # WARNING: This file is auto-generated. Do not modify it manually. -# Generated by: distribution/build.py +# Generated by: distribution/build.py --unified FROM registry.access.redhat.com/ubi9/python-312@sha256:95ec8d3ee9f875da011639213fd254256c29bc58861ac0b11f290a291fa04435 WORKDIR /opt/app-root @@ -8,6 +8,7 @@ RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient RUN pip install \ aiosqlite \ autoevals \ + blobfile \ chardet \ datasets \ fastapi \ @@ -42,7 +43,15 @@ RUN pip install --index-url https://download.pytorch.org/whl/cpu torch torchvisi RUN pip install --no-deps sentence-transformers RUN pip install --no-cache llama-stack==0.2.18 RUN mkdir -p ${HOME}/.llama/providers.d ${HOME}/.cache -COPY distribution/run.yaml ${APP_ROOT}/run.yaml + +# Copy both configurations +COPY distribution/run.yaml ${APP_ROOT}/run-full.yaml +COPY distribution/run-standalone.yaml ${APP_ROOT}/run-standalone.yaml + +# Copy the entrypoint script +COPY --chmod=755 distribution/entrypoint.sh ${APP_ROOT}/entrypoint.sh + +# Copy providers directory (will be filtered by entrypoint script) COPY distribution/providers.d/ ${HOME}/.llama/providers.d/ -ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "/opt/app-root/run.yaml"] +ENTRYPOINT ["/opt/app-root/entrypoint.sh"] \ No newline at end of file diff --git a/distribution/Containerfile.in b/distribution/Containerfile.in index 139dbf25..7c19ebcf 100644 --- a/distribution/Containerfile.in +++ b/distribution/Containerfile.in @@ -5,7 +5,15 @@ RUN pip install sqlalchemy # somehow sqlalchemy[asyncio] is not sufficient {dependencies} RUN pip install --no-cache llama-stack==0.2.18 RUN mkdir -p ${{HOME}}/.llama/providers.d ${{HOME}}/.cache -COPY distribution/run.yaml ${{APP_ROOT}}/run.yaml + +# Copy both configurations +COPY distribution/run.yaml ${{APP_ROOT}}/run-full.yaml +COPY distribution/run-standalone.yaml ${{APP_ROOT}}/run-standalone.yaml + +# Copy the entrypoint script +COPY --chmod=755 distribution/entrypoint.sh ${{APP_ROOT}}/entrypoint.sh + +# Copy providers directory (will be filtered by entrypoint script) COPY distribution/providers.d/ ${{HOME}}/.llama/providers.d/ -ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "/opt/app-root/run.yaml"] +ENTRYPOINT ["/opt/app-root/entrypoint.sh"] diff --git a/distribution/build-standalone.yaml b/distribution/build-standalone.yaml new file mode 100644 index 00000000..85d9fbe0 --- /dev/null +++ b/distribution/build-standalone.yaml @@ -0,0 +1,35 @@ +version: "2" +distribution_spec: + description: Red Hat distribution of Llama Stack (Standalone Docker) + providers: + inference: + - "remote::vllm" + - "inline::sentence-transformers" + vector_io: + - "inline::milvus" + safety: + - "inline::llama-guard" + agents: + - "inline::meta-reference" + # eval: removed trustyai_lmeval provider for standalone Docker + datasetio: + - "remote::huggingface" + - "inline::localfs" + scoring: + - "inline::basic" + - "inline::llm-as-judge" + - "inline::braintrust" + telemetry: + - "inline::meta-reference" + tool_runtime: + - "remote::brave-search" + - "remote::tavily-search" + - "inline::rag-runtime" + - "remote::model-context-protocol" + container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027 +additional_pip_packages: +- aiosqlite +- sqlalchemy[asyncio] +image_type: container +image_name: llama-stack-rh-standalone +# external_providers_dir: distribution/providers.d # Disabled for standalone mode diff --git a/distribution/build.py b/distribution/build.py index 362fd6f3..95dde2d5 100755 --- a/distribution/build.py +++ b/distribution/build.py @@ -5,11 +5,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -# Usage: ./distribution/build.py +# Usage: ./distribution/build.py [--standalone] [--unified] +# Or set STANDALONE=true or UNIFIED=true environment variables +import os import shutil import subprocess import sys +import argparse from pathlib import Path BASE_REQUIREMENTS = [ @@ -57,9 +60,10 @@ def check_llama_stack_version(): print("Continuing without version validation...") -def get_dependencies(): +def get_dependencies(standalone=False): """Execute the llama stack build command and capture dependencies.""" - cmd = "llama stack build --config distribution/build.yaml --print-deps-only" + config_file = "distribution/build-standalone.yaml" if standalone else "distribution/build.yaml" + cmd = f"llama stack build --config {config_file} --print-deps-only" try: result = subprocess.run( cmd, shell=True, capture_output=True, text=True, check=True @@ -112,7 +116,7 @@ def get_dependencies(): sys.exit(1) -def generate_containerfile(dependencies): +def generate_containerfile(dependencies, standalone=False, unified=False): """Generate Containerfile from template with dependencies.""" template_path = Path("distribution/Containerfile.in") output_path = Path("distribution/Containerfile") @@ -126,7 +130,13 @@ def generate_containerfile(dependencies): template_content = f.read() # Add warning message at the top - warning = "# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: distribution/build.py\n\n" + if unified: + mode = "unified" + elif standalone: + mode = "standalone" + else: + mode = "full" + warning = f"# WARNING: This file is auto-generated. Do not modify it manually.\n# Generated by: distribution/build.py --{mode}\n\n" # Process template using string formatting containerfile_content = warning + template_content.format( @@ -141,6 +151,35 @@ def generate_containerfile(dependencies): def main(): + parser = argparse.ArgumentParser( + description="Build Llama Stack distribution", + epilog=""" +Examples: + %(prog)s # Build full version (default) + %(prog)s --standalone # Build standalone version (no Kubernetes deps) + %(prog)s --unified # Build unified version (supports both modes) + STANDALONE=true %(prog)s # Build standalone via environment variable + UNIFIED=true %(prog)s # Build unified via environment variable + """, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("--standalone", action="store_true", + help="Build standalone version without Kubernetes dependencies") + parser.add_argument("--unified", action="store_true", + help="Build unified version that supports both modes via environment variables") + args = parser.parse_args() + + # Check environment variable as fallback + standalone = args.standalone or os.getenv("STANDALONE", "false").lower() in ("true", "1", "yes") + unified = args.unified or os.getenv("UNIFIED", "false").lower() in ("true", "1", "yes") + + if unified: + mode = "unified" + print("Building unified version (supports both full and standalone modes)...") + else: + mode = "standalone" if standalone else "full" + print(f"Building {mode} version...") + print("Checking llama installation...") check_llama_installed() @@ -148,12 +187,27 @@ def main(): check_llama_stack_version() print("Getting dependencies...") - dependencies = get_dependencies() + dependencies = get_dependencies(standalone) print("Generating Containerfile...") - generate_containerfile(dependencies) + generate_containerfile(dependencies, standalone, unified) print("Done!") + print(f"\nTo build the Docker image:") + if unified: + print(" docker build -f distribution/Containerfile -t llama-stack-unified .") + print("\nTo run in standalone mode:") + print(" docker run -e STANDALONE=true -e VLLM_URL=http://host.docker.internal:8000/v1 -e INFERENCE_MODEL=your-model -p 8321:8321 llama-stack-unified") + print("\nTo run in full mode (requires Kubernetes):") + print(" docker run -p 8321:8321 llama-stack-unified") + elif standalone: + print(" docker build -f distribution/Containerfile -t llama-stack-standalone .") + print("\nTo run in standalone mode:") + print(" docker run -e VLLM_URL=http://host.docker.internal:8000/v1 -e INFERENCE_MODEL=your-model -p 8321:8321 llama-stack-standalone") + else: + print(" docker build -f distribution/Containerfile -t llama-stack-full .") + print("\nTo run with full features (requires Kubernetes):") + print(" docker run -p 8321:8321 llama-stack-full") if __name__ == "__main__": diff --git a/distribution/build.yaml b/distribution/build.yaml index fc465882..6e264202 100644 --- a/distribution/build.yaml +++ b/distribution/build.yaml @@ -1,32 +1,32 @@ -version: 2 +version: "2" distribution_spec: description: Red Hat distribution of Llama Stack providers: inference: - - provider_type: remote::vllm - - provider_type: inline::sentence-transformers + - "remote::vllm" + - "inline::sentence-transformers" vector_io: - - provider_type: inline::milvus + - "inline::milvus" safety: - - provider_type: remote::trustyai_fms + - "remote::trustyai_fms" agents: - - provider_type: inline::meta-reference + - "inline::meta-reference" eval: - - provider_type: remote::trustyai_lmeval + - "remote::trustyai_lmeval" datasetio: - - provider_type: remote::huggingface - - provider_type: inline::localfs + - "remote::huggingface" + - "inline::localfs" scoring: - - provider_type: inline::basic - - provider_type: inline::llm-as-judge - - provider_type: inline::braintrust + - "inline::basic" + - "inline::llm-as-judge" + - "inline::braintrust" telemetry: - - provider_type: inline::meta-reference + - "inline::meta-reference" tool_runtime: - - provider_type: remote::brave-search - - provider_type: remote::tavily-search - - provider_type: inline::rag-runtime - - provider_type: remote::model-context-protocol + - "remote::brave-search" + - "remote::tavily-search" + - "inline::rag-runtime" + - "remote::model-context-protocol" container_image: registry.redhat.io/ubi9/python-311:9.6-1749631027 additional_pip_packages: - aiosqlite diff --git a/distribution/entrypoint.sh b/distribution/entrypoint.sh new file mode 100755 index 00000000..a3d1d09e --- /dev/null +++ b/distribution/entrypoint.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Unified entrypoint script for Llama Stack distribution +# Supports both full and standalone modes via STANDALONE environment variable + +set -e + +echo "=== Llama Stack Distribution Entrypoint ===" + +# Check if we should run in standalone mode +if [ "${STANDALONE:-false}" = "true" ]; then + echo "Running in STANDALONE mode (no Kubernetes dependencies)" + + # Use standalone configuration + CONFIG_FILE="/opt/app-root/run-standalone.yaml" + + # Filter out TrustyAI providers from providers.d directory + echo "Filtering out TrustyAI providers for standalone mode..." + mkdir -p ${HOME}/.llama/providers.d + + # Copy only non-TrustyAI providers + find /opt/app-root/.llama/providers.d -name "*.yaml" ! -name "*trustyai*" -exec cp {} ${HOME}/.llama/providers.d/ \; 2>/dev/null || true + + # Remove the external_providers_dir from the config to prevent loading TrustyAI providers + echo "Disabling external providers directory for standalone mode..." + sed -i 's|external_providers_dir:.*|# external_providers_dir: disabled for standalone mode|' "$CONFIG_FILE" + + echo "✓ Standalone configuration ready" + echo "✓ TrustyAI providers excluded" +else + echo "Running in FULL mode (with Kubernetes dependencies)" + + # Use full configuration + CONFIG_FILE="/opt/app-root/run-full.yaml" + + # Copy all providers + echo "Setting up all providers..." + mkdir -p ${HOME}/.llama/providers.d + cp -r /opt/app-root/.llama/providers.d/* ${HOME}/.llama/providers.d/ 2>/dev/null || true + + echo "✓ Full configuration ready" + echo "✓ All providers available" +fi + +echo "Configuration file: $CONFIG_FILE" +echo "APIs enabled: $(grep -A 20 '^apis:' $CONFIG_FILE | grep '^-' | wc -l) APIs" + +# Show which APIs are available +echo "Available APIs:" +grep -A 20 '^apis:' $CONFIG_FILE | grep '^-' | sed 's/^- / - /' || echo " (none listed)" + +# Start the server +echo "Starting Llama Stack server..." +exec python -m llama_stack.core.server.server "$CONFIG_FILE" diff --git a/distribution/run-standalone.yaml b/distribution/run-standalone.yaml new file mode 100644 index 00000000..8d2c1531 --- /dev/null +++ b/distribution/run-standalone.yaml @@ -0,0 +1,130 @@ +version: 2 +image_name: rh-standalone +apis: +- agents +- datasetio +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/milvus.db + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/milvus_registry.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/agents_store.db + responses_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/responses_store.db + # eval: removed trustyai_lmeval provider for standalone Docker + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + namespace: null + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=llama-stack-standalone}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: /opt/app-root/src/.llama/distributions/rh-standalone/trace_store.db + otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/registry.db +inference_store: + type: sqlite + db_path: /opt/app-root/src/.llama/distributions/rh-standalone/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm +- metadata: + embedding_dimension: 768 + model_id: granite-embedding-125m + provider_id: sentence-transformers + provider_model_id: ibm-granite/granite-embedding-125m-english + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 +external_providers_dir: /opt/app-root/src/.llama/providers.d