diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index f87095f5ba..a82b03ca36 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -249,9 +249,9 @@ The following table list the configurable parameters of the chart. | `inferenceExtension.tracing.sampling.samplerArg` | Sampler-specific argument. For `parentbased_traceidratio`, this defines the base sampling rate for new traces (root spans), as a float string in the range [0.0, 1.0]. For example, "0.1" enables 10% sampling. | | `inferenceExtension.volumes` | List of volumes to mount in the EPP deployment as free-form YAML. Optional. | | `inferenceExtension.volumeMounts` | List of volume mounts for the EPP container as free-form YAML. Optional. | -| `experimentalGateway.enabled` | Boolean flag to indicate whether the helm chart should create HttpRoute. Defaults to `False`. Optional. | -| `experimentalGateway.name` | Name of the inference-gateway to be used when creating the HttpRoute. Used only if `experimentalGateway` is enabled. Optional. | -| `experimentalGateway.baseModel` | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional. | +| `experimentalHttpRoute.enabled` | Boolean flag to indicate whether the helm chart should create HttpRoute. Defaults to `False`. Optional. | +| `experimentalHttpRoute.inferenceGatewayName` | Name of the inference-gateway to be used when creating the HttpRoute. Used only if `experimentalHttpRoute` is enabled. Optional. | +| `experimentalHttpRoute.baseModel` | Base model used in the current instance of the epp. When this value is set the HttpRoute will be set to match the pool based on `X-Gateway-Base-Model-Name` header. Optional. | | `inferenceExtension.sidecar.enabled` | Enables or disables the sidecar container in the EPP deployment. Defaults to `false`. | | `inferenceExtension.sidecar.name` | Name of the sidecar container. Required when the sidecar is enabled. | | `inferenceExtension.sidecar.image` | Image for the sidecar container. Required when the sidecar is enabled. | diff --git a/config/charts/inferencepool/templates/httproute.yaml b/config/charts/inferencepool/templates/httproute.yaml index 0ab5cddd19..a280d1581b 100644 --- a/config/charts/inferencepool/templates/httproute.yaml +++ b/config/charts/inferencepool/templates/httproute.yaml @@ -1,4 +1,4 @@ -{{- if .Values.experimentalGateway.enabled }} +{{- if .Values.experimentalHttpRoute.enabled }} apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: @@ -8,7 +8,7 @@ spec: parentRefs: - group: gateway.networking.k8s.io kind: Gateway - name: {{ .Values.experimentalGateway.name }} + name: {{ .Values.experimentalHttpRoute.inferenceGatewayName }} rules: - backendRefs: - group: inference.networking.k8s.io @@ -18,11 +18,11 @@ spec: - path: type: PathPrefix value: / - {{- if .Values.experimentalGateway.baseModel }} + {{- if .Values.experimentalHttpRoute.baseModel }} headers: - type: Exact name: X-Gateway-Base-Model-Name - value: {{ .Values.experimentalGateway.baseModel }} + value: {{ .Values.experimentalHttpRoute.baseModel }} {{- end }} timeouts: request: 300s diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 5ef45e2a46..c6cc51e8f9 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -185,11 +185,11 @@ provider: # http: # maxRequestsPerConnection: 256000 -# experimentalGateway section is used to deploy httproute as part of the epp helm chart. -# this section should be extracted to a separate chart. -experimentalGateway: +# experimentalHttpRoute section is used to deploy httproute as part of the epp helm chart. +# this section is temporary and should be extracted to a separate chart. +experimentalHttpRoute: enabled: false # a flag to indicate whether to create the httproute as part of the chart or not. - name: inference-gateway + inferenceGatewayName: inference-gateway # DEPRECATED and will be removed in v1.3. Instead, use `provider.istio.*`. istio: diff --git a/site-src/_includes/epp.md b/site-src/_includes/epp.md index ddcdd52901..df0a7b6a89 100644 --- a/site-src/_includes/epp.md +++ b/site-src/_includes/epp.md @@ -5,7 +5,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ - --set experimentalGateway.enabled=true \ + --set experimentalHttpRoute.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -17,7 +17,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ - --set experimentalGateway.enabled=true \ + --set experimentalHttpRoute.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -29,7 +29,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ - --set experimentalGateway.enabled=true \ + --set experimentalHttpRoute.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ``` @@ -41,7 +41,7 @@ helm install vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ --set provider.name=$GATEWAY_PROVIDER \ - --set experimentalGateway.enabled=true \ + --set experimentalHttpRoute.enabled=true \ --version $IGW_CHART_VERSION \ oci://registry.k8s.io/gateway-api-inference-extension/charts/inferencepool ```