Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions container/Dockerfile.epp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Dockerfile.epp - Custom Dockerfile for GAIE EPP. This is to be used with the deploy/inference-gateway/build-epp-dynamo.sh

ARG BUILDER_IMAGE=golang:1.24
ARG BASE_IMAGE=ubuntu:22.04

############################
# Builder
############################
FROM ${BUILDER_IMAGE} AS builder

ENV CGO_ENABLED=1
# be explicit; helps cgo when linking libstdc++
ENV CC=gcc
ENV CXX=g++

# C/C++ toolchain for cgo, and libstdc++ for link-time
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
gcc g++ \
libc6-dev \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*

ARG COMMIT_SHA=unknown
ARG BUILD_REF

WORKDIR /src

# deps first (cache)
COPY go.mod go.sum ./
RUN go mod download

# source
COPY cmd/epp ./cmd/epp
COPY pkg/epp ./pkg/epp
COPY internal ./internal
COPY api ./api

# sanity (optional)
RUN ls -la pkg/epp/scheduling/plugins/dynamo_kv_scorer/include/ || echo "Headers not found"
RUN ls -la pkg/epp/scheduling/plugins/dynamo_kv_scorer/lib/ || echo "Library not found"

# build
WORKDIR /src/cmd/epp
RUN go build \
-ldflags="-X sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics.BuildRef=${BUILD_REF}" \
-o /epp

############################
# Runtime
############################
FROM ${BASE_IMAGE} AS runtime

# Minimal runtime deps; include libstdc++ runtime for -lstdc++
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
libstdc++6 \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd -r nonroot && useradd -r -g nonroot nonroot

WORKDIR /
COPY --from=builder /epp /epp

USER nonroot:nonroot
ENTRYPOINT ["/epp"]
2 changes: 1 addition & 1 deletion deploy/inference-gateway/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ The script will apply a custom patch to the code with your GAIE repo and build t
```bash
# Use your custom paths
export DYNAMO_DIR=/path/to/dynamo
export EPP_DIR=/path/to/gateway-api-inference-extension
export GAIE_DIR=/path/to/gateway-api-inference-extension

# Run the script
cd deploy/inference-gateway
Expand Down
39 changes: 23 additions & 16 deletions deploy/inference-gateway/build-epp-dynamo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,39 +23,40 @@ if [[ -z "${DYNAMO_DIR}" ]]; then
exit 1
fi

if [[ -z "${EPP_DIR}" ]]; then
echo "EPP_DIR environment variable must be set"
echo " Example: export EPP_DIR=/path/to/gateway-api-inference-extension-dynamo"
if [[ -z "${GAIE_DIR}" ]]; then
echo "GAIE_DIR environment variable must be set"
echo " Example: export GAIE_DIR=/path/to/gateway-api-inference-extension"
exit 1
fi
DYNAMO_LIB_DIR="${EPP_DIR}/pkg/epp/scheduling/plugins/dynamo_kv_scorer/lib"
DYNAMO_INCLUDE_DIR="${EPP_DIR}/pkg/epp/scheduling/plugins/dynamo_kv_scorer/include"
DYNAMO_LIB_DIR="${GAIE_DIR}/pkg/epp/scheduling/plugins/dynamo_kv_scorer/lib"
DYNAMO_INCLUDE_DIR="${GAIE_DIR}/pkg/epp/scheduling/plugins/dynamo_kv_scorer/include"

echo "🏗️ Building Dynamo KV Router C Library..."
echo "Building Dynamo KV Router C Library..."

# Step 1: Build the static library
echo "📦 Building static library..."
echo "Building static library..."
cd "${DYNAMO_DIR}"
cargo build --release -p libdynamo_llm

# Step 2: Generate header file (with fallback)
echo "📝 Generating C header..."
echo "Generating C header..."
HEADER_OUTPUT="${DYNAMO_DIR}/lib/bindings/c/include/nvidia/dynamo_llm/llm_engine.h"

if ! cbindgen --config lib/bindings/c/cbindgen.toml --crate libdynamo_llm --output "${HEADER_OUTPUT}"; then
echo "cbindgen failed, using fallback header..."
cp "${DYNAMO_DIR}/lib/bindings/c/src/fallback_header.h" "${HEADER_OUTPUT}"
fi

# Step 3: Ensure EPP directories exist
echo "Preparing EPP directories..."
# Step 3: Ensure directories exist
echo "Preparing directories..."
mkdir -p "${DYNAMO_LIB_DIR}"
mkdir -p "${DYNAMO_INCLUDE_DIR}"

# Step 4: Copy files to EPP
echo "Copying files to EPP..."
# Step 4: Copy files to GAIE project
echo "Copying files to the GAIE project..."
cp "${HEADER_OUTPUT}" "${DYNAMO_INCLUDE_DIR}/"
cp "${DYNAMO_DIR}/target/release/libdynamo_llm_capi.a" "${DYNAMO_LIB_DIR}/"
cp "${DYNAMO_DIR}/container/Dockerfile.epp" "${GAIE_DIR}/Dockerfile.dynamo"

# Verify files were copied
if [[ ! -f "${DYNAMO_INCLUDE_DIR}/llm_engine.h" ]]; then
Expand All @@ -68,13 +69,19 @@ if [[ ! -f "${DYNAMO_LIB_DIR}/libdynamo_llm_capi.a" ]]; then
exit 1
fi

if [[ ! -f "${GAIE_DIR}/Dockerfile.epp" ]]; then
echo "Docker.epp file copy failed!"
exit 1
fi

echo "Files copied successfully:"
echo " Header: ${DYNAMO_INCLUDE_DIR}/llm_engine.h"
echo " Library: ${DYNAMO_LIB_DIR}/libdynamo_llm_capi.a"
echo " Docker: ${GAIE_DIR}/Dockerfile.epp"

# Step 5: Apply Dynamo patch (if it exists)
echo "🔧 Applying Dynamo patch..."
cd "${EPP_DIR}"
echo "Applying Dynamo patch..."
cd "${GAIE_DIR}"

PATCH_FILE="${DYNAMO_DIR}/deploy/inference-gateway/epp-patches/v0.5.1-2/epp-v0.5.1-dyn2.patch"
if [[ -f "${PATCH_FILE}" ]]; then
Expand All @@ -89,7 +96,7 @@ else
fi

# Step 6: Build the EPP image
echo "Building the EPP image..."
echo "Building the custom EPP image for GAIE..."
make dynamo-image-local-load

echo "EPP with Dynamo KV routing built"
echo "EPP image with Dynamo KV routing built"
Original file line number Diff line number Diff line change
@@ -1,75 +1,3 @@
diff --git a/Dockerfile.dynamo b/Dockerfile.dynamo
new file mode 100644
index 0000000..3f0e0a0
--- /dev/null
+++ b/Dockerfile.dynamo
@@ -0,0 +1,66 @@
+# Dockerfile.dynamo - Custom Dockerfile for Dynamo FFI plugin
+ARG BUILDER_IMAGE=golang:1.24
+ARG BASE_IMAGE=ubuntu:22.04
+
+############################
+# Builder
+############################
+FROM ${BUILDER_IMAGE} AS builder
+
+ENV CGO_ENABLED=1
+ENV GOOS=linux
+ENV GOARCH=amd64
+# be explicit; helps cgo when linking libstdc++
+ENV CC=gcc
+ENV CXX=g++
+
+# C/C++ toolchain for cgo, and libstdc++ for link-time
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ build-essential \
+ gcc g++ \
+ libc6-dev \
+ ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+ARG COMMIT_SHA=unknown
+ARG BUILD_REF
+
+WORKDIR /src
+
+# deps first (cache)
+COPY go.mod go.sum ./
+RUN go mod download
+
+# source
+COPY cmd/epp ./cmd/epp
+COPY pkg/epp ./pkg/epp
+COPY internal ./internal
+COPY api ./api
+
+# sanity (optional)
+RUN ls -la pkg/epp/scheduling/plugins/dynamo_kv_scorer/include/ || echo "Headers not found"
+RUN ls -la pkg/epp/scheduling/plugins/dynamo_kv_scorer/lib/ || echo "Library not found"
+
+# build
+WORKDIR /src/cmd/epp
+RUN go build \
+ -ldflags="-X sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics.CommitSHA=${COMMIT_SHA} -X sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics.BuildRef=${BUILD_REF}" \
+ -o /epp
+
+############################
+# Runtime
+############################
+FROM ${BASE_IMAGE} AS runtime
+
+# Minimal runtime deps; include libstdc++ runtime for -lstdc++
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ libstdc++6 \
+ && rm -rf /var/lib/apt/lists/* \
+ && groupadd -r nonroot && useradd -r -g nonroot nonroot
+
+WORKDIR /
+COPY --from=builder /epp /epp
+
+USER nonroot:nonroot
+ENTRYPOINT ["/epp"]
diff --git a/Makefile b/Makefile
index dee7e99..4679ce2 100644
--- a/Makefile
Expand Down Expand Up @@ -647,7 +575,7 @@ index 0000000..1f6a41f
+)
+
+func loadDynamoConfig() {
+ ffiNamespace = getEnvOrDefault("DYN_NAMESPACE", "vllm-agg")
+ ffiNamespace = getEnvOrDefault("DYNAMO_NAMESPACE", "vllm-agg")
+ ffiComponent = getEnvOrDefault("DYNAMO_COMPONENT", "backend")
+ ffiModel = getEnvOrDefault("DYNAMO_MODEL", "Qwen/Qwen3-0.6B")
+ ffiWorkerID = getEnvInt64OrDefault("DYNAMO_WORKER_ID", 1)
Expand Down
Loading