diff --git a/Containerfile b/Containerfile index 07bd8adc..c47cb4fe 100644 --- a/Containerfile +++ b/Containerfile @@ -62,4 +62,4 @@ ENTRYPOINT ["python3.12", "src/lightspeed_stack.py"] LABEL vendor="Red Hat, Inc." # no-root user is checked in Konflux -USER 1001 +USER 1001 \ No newline at end of file diff --git a/README.md b/README.md index 44bacdfc..147200c1 100644 --- a/README.md +++ b/README.md @@ -506,6 +506,80 @@ Container images are built for the following platforms: 1. `linux/amd64` - main platform for deployment 1. `linux/arm64`- Mac users with M1/M2/M3 CPUs +## Building Container Images + +The repository includes production-ready container configurations that support two deployment modes: + +1. **Server Mode**: lightspeed-core connects to llama-stack as a separate service +2. **Library Mode**: llama-stack runs as a library within lightspeed-core + +### Llama-Stack as Separate Service (Server Mode) + +When using llama-stack as a separate service, the existing `docker-compose.yaml` provides the complete setup. This builds two containers for lightspeed core and llama stack. + +**Configuration** (`lightspeed-stack.yaml`): +```yaml +llama_stack: + use_as_library_client: false + url: http://llama-stack:8321 # container name from docker-compose.yaml + api_key: xyzzy +``` + +In the root of this project simply run: + +```bash +# Set your OpenAI API key +export OPENAI_API_KEY="your-api-key-here" + +# Start both services +podman compose up --build + +# Access lightspeed-core at http://localhost:8080 +# Access llama-stack at http://localhost:8321 +``` + +### Llama-Stack as Library (Library Mode) + +When embedding llama-stack directly in the container, use the existing `Containerfile` directly (this will not build the llama stack service in a separate container). First modify the `lightspeed-stack.yaml` config to use llama stack in library mode. + +**Configuration** (`lightspeed-stack.yaml`): +```yaml +llama_stack: + use_as_library_client: true + library_client_config_path: /app-root/run.yaml +``` + +**Build and run**: +```bash +# Build lightspeed-core with embedded llama-stack +podman build -f Containerfile -t my-lightspeed-core:latest . + +# Run with embedded llama-stack +podman run \ + -p 8080:8080 \ + -v ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z \ + -v ./run.yaml:/app-root/run.yaml:Z \ + -e OPENAI_API_KEY=your-api-key \ + my-lightspeed-core:latest +``` + +For macosx users: +```bash +podman run \ + -p 8080:8080 \ + -v ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:ro \ + -v ./run.yaml:/app-root/run.yaml:ro \ + -e OPENAI_API_KEY=your-api-key \ + my-lightspeed-core:latest +``` + +### Verify it's running properly + +A simple sanity check: + +```bash +curl -H "Accept: application/json" http://localhost:8080/v1/models +``` # Endpoints diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index d7d3f571..9ac7f63a 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -1,6 +1,6 @@ name: Lightspeed Core Service (LCS) service: - host: localhost + host: 0.0.0.0 port: 8080 auth_enabled: false workers: 1 @@ -13,7 +13,7 @@ llama_stack: # Alternative for "as library use" # use_as_library_client: true # library_client_config_path: - url: http://localhost:8321 + url: http://llama-stack:8321 api_key: xyzzy user_data_collection: feedback_enabled: true diff --git a/run.yaml b/run.yaml new file mode 100644 index 00000000..1196d854 --- /dev/null +++ b/run.yaml @@ -0,0 +1,125 @@ +version: '2' +image_name: minimal-viable-llama-stack-configuration + +apis: + - agents + - datasetio + - eval + - inference + - post_training + - safety + - scoring + - telemetry + - tool_runtime + - vector_io +benchmarks: [] +container_image: null +datasets: [] +external_providers_dir: null +inference_store: + db_path: .llama/distributions/ollama/inference_store.db + type: sqlite +logging: null +metadata_store: + db_path: .llama/distributions/ollama/registry.db + namespace: null + type: sqlite +providers: + agents: + - config: + persistence_store: + db_path: .llama/distributions/ollama/agents_store.db + namespace: null + type: sqlite + responses_store: + db_path: .llama/distributions/ollama/responses_store.db + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + datasetio: + - config: + kvstore: + db_path: .llama/distributions/ollama/huggingface_datasetio.db + namespace: null + type: sqlite + provider_id: huggingface + provider_type: remote::huggingface + - config: + kvstore: + db_path: .llama/distributions/ollama/localfs_datasetio.db + namespace: null + type: sqlite + provider_id: localfs + provider_type: inline::localfs + eval: + - config: + kvstore: + db_path: .llama/distributions/ollama/meta_reference_eval.db + namespace: null + type: sqlite + provider_id: meta-reference + provider_type: inline::meta-reference + inference: + - provider_id: openai + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY} + post_training: + - config: + checkpoint_format: huggingface + device: cpu + distributed_backend: null + provider_id: huggingface + provider_type: inline::huggingface + safety: + - config: + excluded_categories: [] + provider_id: llama-guard + provider_type: inline::llama-guard + scoring: + - config: {} + provider_id: basic + provider_type: inline::basic + - config: {} + provider_id: llm-as-judge + provider_type: inline::llm-as-judge + - config: + openai_api_key: '********' + provider_id: braintrust + provider_type: inline::braintrust + telemetry: + - config: + service_name: 'lightspeed-stack-telemetry' + sinks: sqlite + sqlite_db_path: .llama/distributions/ollama/trace_store.db + provider_id: meta-reference + provider_type: inline::meta-reference + tool_runtime: + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + vector_io: + - config: + kvstore: + db_path: .llama/distributions/ollama/faiss_store.db + namespace: null + type: sqlite + provider_id: faiss + provider_type: inline::faiss +scoring_fns: [] +server: + auth: null + host: null + port: 8321 + quota: null + tls_cafile: null + tls_certfile: null + tls_keyfile: null +shields: [] +vector_dbs: [] + +models: + - model_id: gpt-4-turbo + provider_id: openai + model_type: llm + provider_model_id: gpt-4-turbo diff --git a/test.containerfile b/test.containerfile index f030f2da..1ee04326 100644 --- a/test.containerfile +++ b/test.containerfile @@ -14,7 +14,7 @@ COPY README.md ./ COPY src/ ./src/ RUN microdnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \ - python3.12 python3.12-devel python3.12-pip git tar + python3.12 python3.12-devel python3.12-pip git tar gcc gcc-c++ make RUN curl -LsSf https://astral.sh/uv/install.sh | sh @@ -25,4 +25,4 @@ RUN uv -h # Include dev deps for testing (pytest, behave, etc.) RUN uv sync --locked --no-install-project --group dev --group llslibdev -CMD ["uv", "run", "llama", "stack", "run", "run.yaml"] +CMD ["uv", "run", "llama", "stack", "run", "run.yaml"] \ No newline at end of file