From 8dbacdbc50f7801c5d6de3f7e07cdc952e2e000c Mon Sep 17 00:00:00 2001 From: Nir Rozenbaum Date: Sun, 11 Jan 2026 09:33:21 +0200 Subject: [PATCH] changed httproute creation to be behind a flag. in the long term this should be moved to a separate chart. Signed-off-by: Nir Rozenbaum --- config/charts/inferencepool/README.md | 4 +++- config/charts/inferencepool/templates/httproute.yaml | 8 +++++--- config/charts/inferencepool/values.yaml | 5 ++++- site-src/_includes/epp.md | 4 ++++ 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 9910fd402a..f87095f5ba 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -247,9 +247,11 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.tracing.otelExporterEndpoint` | OpenTelemetry collector endpoint. | | `inferenceExtension.tracing.sampling.sampler` | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans. | | `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. | -| `inferenceExtension.baseModel` | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional. | | `inferenceExtension.volumes` | List of volumes to mount in the EPP deployment as free-form YAML. Optional. | | `inferenceExtension.volumeMounts` | List of volume mounts for the EPP container as free-form YAML. Optional. | +| `experimentalGateway.enabled` | Boolean flag to indicate whether the helm chart should create HttpRoute. Defaults to `False`. Optional. | +| `experimentalGateway.name` | Name of the inference-gateway to be used when creating the HttpRoute. Used only if `experimentalGateway` is enabled. Optional. | +| `experimentalGateway.baseModel` | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional. | | `inferenceExtension.sidecar.enabled` | Enables or disables the sidecar container in the EPP deployment. Defaults to `false`. | | `inferenceExtension.sidecar.name` | Name of the sidecar container. Required when the sidecar is enabled. | | `inferenceExtension.sidecar.image` | Image for the sidecar container. Required when the sidecar is enabled. | diff --git a/config/charts/inferencepool/templates/httproute.yaml b/config/charts/inferencepool/templates/httproute.yaml index 50cf8cb257..0ab5cddd19 100644 --- a/config/charts/inferencepool/templates/httproute.yaml +++ b/config/charts/inferencepool/templates/httproute.yaml @@ -1,3 +1,4 @@ +{{- if .Values.experimentalGateway.enabled }} apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: @@ -7,7 +8,7 @@ spec: parentRefs: - group: gateway.networking.k8s.io kind: Gateway - name: {{ .Values.inferenceGateway.name }} + name: {{ .Values.experimentalGateway.name }} rules: - backendRefs: - group: inference.networking.k8s.io @@ -17,11 +18,12 @@ spec: - path: type: PathPrefix value: / - {{- if .Values.inferenceExtension.baseModel }} + {{- if .Values.experimentalGateway.baseModel }} headers: - type: Exact name: X-Gateway-Base-Model-Name - value: {{ .Values.inferenceExtension.baseModel }} + value: {{ .Values.experimentalGateway.baseModel }} {{- end }} timeouts: request: 300s +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 2041dfcfb8..c6d58e753c 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -185,7 +185,10 @@ provider: # http: # maxRequestsPerConnection: 256000 -inferenceGateway: +# experimentalGateway section is used to deploy httproute as part of the epp helm chart. +# this section should be extracted to a separate chart. +experimentalGateway: + enabled: false # a flag to indicate whether to create the httproute as part of the chart or not. name: inference-gateway # DEPRECATED and will be removed in v1.3. Instead, use `provider.istio.*`. diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md index a82ca68d3e..ddcdd52901 100644 --- a/site-src/_includes/epp.md +++ b/site-src/_includes/epp.md @@ -5,6 +5,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ + --set experimentalGateway.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -16,6 +17,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ + --set experimentalGateway.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -27,6 +29,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ + --set experimentalGateway.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -38,6 +41,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ + --set experimentalGateway.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ```