kubernetes-sigs · ahg-g · Jan 12, 2026 · Jan 11, 2026
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -247,9 +247,11 @@ The following table list the configurable parameters of the chart.
 | `inferenceExtension.tracing.otelExporterEndpoint`          | OpenTelemetry collector endpoint.                                                                                                                                                                                                                  |
 | `inferenceExtension.tracing.sampling.sampler`              | The trace sampler to use. Currently, only `parentbased_traceidratio` is supported. This sampler respects the parent span’s sampling decision when present, and applies the configured ratio for root spans.                                        |
 | `inferenceExtension.tracing.sampling.samplerArg`           | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling.                                    |
-| `inferenceExtension.baseModel`                             | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional.                                                                                                                                                                       |
 | `inferenceExtension.volumes`                               | List of volumes to mount in the EPP deployment as free-form YAML. Optional.                                                                                                                                                                       |
 | `inferenceExtension.volumeMounts`                          | List of volume mounts for the EPP container as free-form YAML. Optional.                                                                                                                                                                          |
+| `experimentalGateway.enabled`                             | Boolean flag to indicate whether the helm chart should create HttpRoute. Defaults to `False`. Optional.                                                                                                                                                       |
+| `experimentalGateway.name`                             | Name of the inference-gateway to be used when creating the HttpRoute. Used only if `experimentalGateway` is enabled. Optional.                         |
+| `experimentalGateway.baseModel`                             | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional.                                                                                                                                                                       |
 | `inferenceExtension.sidecar.enabled`                       | Enables or disables the sidecar container in the EPP deployment. Defaults to `false`.                                                                                                                                                             |
 | `inferenceExtension.sidecar.name`                          | Name of the sidecar container. Required when the sidecar is enabled.                                                                                                                                                                              |
 | `inferenceExtension.sidecar.image`                         | Image for the sidecar container. Required when the sidecar is enabled.                                                                                                                                                                            |

diff --git a/config/charts/inferencepool/templates/httproute.yaml b/config/charts/inferencepool/templates/httproute.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.experimentalGateway.enabled }}
 apiVersion: gateway.networking.k8s.io/v1
 kind: HTTPRoute
 metadata:
@@ -7,7 +8,7 @@ spec:
   parentRefs:
   - group: gateway.networking.k8s.io
     kind: Gateway
-    name: {{ .Values.inferenceGateway.name }}
+    name: {{ .Values.experimentalGateway.name }}
   rules:
   - backendRefs:
     - group: inference.networking.k8s.io
@@ -17,11 +18,12 @@ spec:
     - path:
         type: PathPrefix
         value: /
-      {{- if .Values.inferenceExtension.baseModel }}
+      {{- if .Values.experimentalGateway.baseModel }}
       headers:
         - type: Exact
           name: X-Gateway-Base-Model-Name
-          value: {{ .Values.inferenceExtension.baseModel }}
+          value: {{ .Values.experimentalGateway.baseModel }}
       {{- end }}
     timeouts:
       request: 300s
+{{- end }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -185,7 +185,10 @@ provider:
         #   http:
         #     maxRequestsPerConnection: 256000
 
-inferenceGateway:
+# experimentalGateway section is used to deploy httproute as part of the epp helm chart.
+# this section should be extracted to a separate chart.
+experimentalGateway:
+  enabled: false # a flag to indicate whether to create the httproute as part of the chart or not.
   name: inference-gateway
 
 # DEPRECATED and will be removed in v1.3. Instead, use `provider.istio.*`.

diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md
@@ -5,6 +5,7 @@
       helm install vllm-llama3-8b-instruct \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
+      --set experimentalGateway.enabled=true \
       --version $IGW_CHART_VERSION \
       oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
       ```
@@ -16,6 +17,7 @@
       helm install vllm-llama3-8b-instruct \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
+      --set experimentalGateway.enabled=true \
       --version $IGW_CHART_VERSION \
       oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
       ```
@@ -27,6 +29,7 @@
       helm install vllm-llama3-8b-instruct \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
+      --set experimentalGateway.enabled=true \
       --version $IGW_CHART_VERSION \
       oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
       ```
@@ -38,6 +41,7 @@
       helm install vllm-llama3-8b-instruct \
       --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
       --set provider.name=$GATEWAY_PROVIDER \
+      --set experimentalGateway.enabled=true \
       --version $IGW_CHART_VERSION \
       oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool
       ```