diff --git a/conformance/resources/manifests/manifests.yaml b/conformance/resources/manifests/manifests.yaml index e25c29466..a836ece12 100644 --- a/conformance/resources/manifests/manifests.yaml +++ b/conformance/resources/manifests/manifests.yaml @@ -4,7 +4,6 @@ # individual test files or other resource directories (e.g., sample_backends). --- -# Namespace for core infrastructure like Gateways. apiVersion: v1 kind: Namespace metadata: @@ -12,8 +11,6 @@ metadata: labels: gateway-conformance: infra --- -# Namespace for application backends (potentially simulating model servers -# or where InferencePools might reside in some tests). apiVersion: v1 kind: Namespace metadata: @@ -21,8 +18,6 @@ metadata: labels: gateway-conformance: backend --- -# Namespace for simple web server backends. This is expected by -# the upstream conformance suite's Setup method. apiVersion: v1 kind: Namespace metadata: @@ -30,31 +25,24 @@ metadata: labels: gateway-conformance: web-backend --- -# A basic Gateway resource that allows HTTPRoutes from the same namespace. -# Tests can use this as a parent reference for routes that target InferencePools. apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: conformance-gateway namespace: gateway-conformance-infra spec: - # The conformance suite runner will replace this placeholder - # with the actual GatewayClass name provided via flags. gatewayClassName: "{GATEWAY_CLASS_NAME}" listeners: - - name: http # Standard listener name + - name: http port: 80 protocol: HTTP allowedRoutes: namespaces: from: All kinds: - # Allows HTTPRoutes to attach, which can then reference InferencePools. - group: gateway.networking.k8s.io kind: HTTPRoute --- -# --- Conformance Secondary Gateway Definition --- -# A second generic Gateway resource for tests requiring multiple Gateways. apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: @@ -66,7 +54,95 @@ spec: - name: http port: 80 protocol: HTTP - hostname: "secondary.example.com" # Distinct hostname to differentiate from conformance-gateway + hostname: "secondary.example.com" allowedRoutes: namespaces: from: All +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: inference-model-1 + namespace: gateway-conformance-app-backend + labels: + app: inference-model-1 +spec: + replicas: 1 + selector: + matchLabels: + app: inference-model-1 + template: + metadata: + labels: + app: inference-model-1 + spec: + containers: + - name: echo-basic-1 + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - name: http + containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: inference-model-2 + namespace: gateway-conformance-app-backend + labels: + app: inference-model-2 +spec: + replicas: 1 + selector: + matchLabels: + app: inference-model-2 + template: + metadata: + labels: + app: inference-model-2 + spec: + containers: + - name: echo-basic-2 + image: gcr.io/k8s-staging-gateway-api/echo-basic:v20240412-v1.0.0-394-g40c666fd + ports: + - name: http + containerPort: 3000 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 3 + periodSeconds: 5 + failureThreshold: 2 + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP diff --git a/conformance/tests/basic/inferencepool_multiple_rules_different_pools.go b/conformance/tests/basic/inferencepool_multiple_rules_different_pools.go new file mode 100644 index 000000000..a3ff98674 --- /dev/null +++ b/conformance/tests/basic/inferencepool_multiple_rules_different_pools.go @@ -0,0 +1,91 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package basic + +import ( + "testing" + + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/gateway-api/conformance/utils/suite" + "sigs.k8s.io/gateway-api/pkg/features" + + "sigs.k8s.io/gateway-api-inference-extension/conformance/tests" + k8sutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/kubernetes" + trafficutils "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/traffic" +) + +func init() { + tests.ConformanceTests = append(tests.ConformanceTests, HTTPRouteMultipleRulesDifferentPools) +} + +var HTTPRouteMultipleRulesDifferentPools = suite.ConformanceTest{ + ShortName: "HTTPRouteMultipleRulesDifferentPools", + Description: "An HTTPRoute with two rules routing to two different InferencePools", + Manifests: []string{"tests/basic/inferencepool_multiple_rules_different_pools.yaml"}, + Features: []features.FeatureName{ + features.SupportGateway, + features.SupportHTTPRoute, + features.FeatureName("SupportInferencePool"), + }, + Test: func(t *testing.T, s *suite.ConformanceTestSuite) { + const ( + appBackendNamespace = "gateway-conformance-app-backend" + infraNamespace = "gateway-conformance-infra" + + poolPrimaryName = "pool-primary" + poolSecondaryName = "pool-secondary" + routeName = "httproute-multiple-rules-different-pools" + gatewayName = "conformance-gateway" + + backendPrimaryLabelValue = "inference-model-1" + backendSecondaryLabelValue = "inference-model-2" + backendAppLabelKey = "app" + + primaryPath = "/primary" + secondaryPath = "/secondary" + ) + + primaryPoolNN := types.NamespacedName{Name: poolPrimaryName, Namespace: appBackendNamespace} + secondaryPoolNN := types.NamespacedName{Name: poolSecondaryName, Namespace: appBackendNamespace} + routeNN := types.NamespacedName{Name: routeName, Namespace: appBackendNamespace} + gatewayNN := types.NamespacedName{Name: gatewayName, Namespace: infraNamespace} + + t.Run("Wait for resources to be accepted", func(t *testing.T) { + k8sutils.HTTPRouteAndInferencePoolMustBeAcceptedAndRouteAccepted(t, s.Client, routeNN, gatewayNN, primaryPoolNN) + k8sutils.HTTPRouteAndInferencePoolMustBeAcceptedAndRouteAccepted(t, s.Client, routeNN, gatewayNN, secondaryPoolNN) + }) + + t.Run("Traffic should be routed to the correct pool based on path", func(t *testing.T) { + primarySelector := labels.SelectorFromSet(labels.Set{backendAppLabelKey: backendPrimaryLabelValue}) + secondarySelector := labels.SelectorFromSet(labels.Set{backendAppLabelKey: backendSecondaryLabelValue}) + + primaryPod := k8sutils.GetPod(t, s.Client, appBackendNamespace, primarySelector, s.TimeoutConfig.RequestTimeout) + secondaryPod := k8sutils.GetPod(t, s.Client, appBackendNamespace, secondarySelector, s.TimeoutConfig.RequestTimeout) + + gwAddr := k8sutils.GetGatewayEndpoint(t, s.Client, s.TimeoutConfig, gatewayNN) + + t.Run("request to primary pool", func(t *testing.T) { + trafficutils.MakeRequestAndExpectResponseFromPod(t, s.RoundTripper, s.TimeoutConfig, gwAddr, primaryPath, primaryPod) + }) + + t.Run("request to secondary pool", func(t *testing.T) { + trafficutils.MakeRequestAndExpectResponseFromPod(t, s.RoundTripper, s.TimeoutConfig, gwAddr, secondaryPath, secondaryPod) + }) + }) + }, +} diff --git a/conformance/tests/basic/inferencepool_multiple_rules_different_pools.yaml b/conformance/tests/basic/inferencepool_multiple_rules_different_pools.yaml new file mode 100644 index 000000000..de3427434 --- /dev/null +++ b/conformance/tests/basic/inferencepool_multiple_rules_different_pools.yaml @@ -0,0 +1,150 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: pool-primary-epp + namespace: gateway-conformance-app-backend +spec: + selector: + app: pool-primary-epp + ports: + - name: grpc + port: 9002 + targetPort: 9002 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pool-primary-epp + namespace: gateway-conformance-app-backend + labels: + app: pool-primary-epp +spec: + replicas: 1 + selector: + matchLabels: + app: pool-primary-epp + template: + metadata: + labels: + app: pool-primary-epp + spec: + containers: + - name: epp + # TODO(#996) Switch to use a released version of the image instead of epp:main + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main + imagePullPolicy: Always + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING + value: "true" + args: + - "-poolName" + - "pool-primary" + - "-poolNamespace" + - "gateway-conformance-app-backend" + - -grpcPort + - "9002" + - -grpcHealthPort + - "9003" +--- +apiVersion: v1 +kind: Service +metadata: + name: pool-secondary-epp + namespace: gateway-conformance-app-backend +spec: + selector: + app: pool-secondary-epp + ports: + - name: grpc + port: 9002 + targetPort: 9002 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pool-secondary-epp + namespace: gateway-conformance-app-backend + labels: + app: pool-secondary-epp +spec: + replicas: 1 + selector: + matchLabels: + app: pool-secondary-epp + template: + metadata: + labels: + app: pool-secondary-epp + spec: + containers: + - name: epp + image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main + imagePullPolicy: Always + env: + - name: ENABLE_REQ_HEADER_BASED_SCHEDULER_FOR_TESTING + value: "true" + args: + - "-poolName" + - "pool-secondary" + - "-poolNamespace" + - "gateway-conformance-app-backend" + - -grpcPort + - "9002" + - -grpcHealthPort + - "9003" +--- +apiVersion: inference.networking.x-k8s.io/v1alpha2 +kind: InferencePool +metadata: + name: pool-primary + namespace: gateway-conformance-app-backend +spec: + selector: + app: inference-model-1 + targetPortNumber: 3000 + extensionRef: + name: pool-primary-epp + portNumber: 9002 +--- +apiVersion: inference.networking.x-k8s.io/v1alpha2 +kind: InferencePool +metadata: + name: pool-secondary + namespace: gateway-conformance-app-backend +spec: + selector: + app: inference-model-2 + targetPortNumber: 3000 + extensionRef: + name: pool-secondary-epp + portNumber: 9002 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: httproute-multiple-rules-different-pools + namespace: gateway-conformance-app-backend +spec: + parentRefs: + - name: conformance-gateway + namespace: gateway-conformance-infra + rules: + - matches: + - path: + type: PathPrefix + value: /primary + backendRefs: + - name: pool-primary + kind: InferencePool + group: inference.networking.x-k8s.io + port: 80 + - matches: + - path: + type: PathPrefix + value: /secondary + backendRefs: + - name: pool-secondary + kind: InferencePool + group: inference.networking.x-k8s.io + port: 80 diff --git a/conformance/utils/config/timing.go b/conformance/utils/config/timing.go index f5d4eeb52..f520ec702 100644 --- a/conformance/utils/config/timing.go +++ b/conformance/utils/config/timing.go @@ -29,8 +29,8 @@ type InferenceExtensionTimeoutConfig struct { // All fields from gatewayconfig.TimeoutConfig will be available directly. gatewayconfig.TimeoutConfig - // InferencePoolMustHaveConditionTimeout represents the maximum time to wait for an InferencePool to have a specific condition. - InferencePoolMustHaveConditionTimeout time.Duration + // GeneralMustHaveConditionTimeout represents the maximum time to wait for an InferencePool, HttpRoute or other assets to have a specific condition. + GeneralMustHaveConditionTimeout time.Duration // InferencePoolMustHaveConditionInterval represents the polling interval for checking an InferencePool's condition. InferencePoolMustHaveConditionInterval time.Duration @@ -38,16 +38,15 @@ type InferenceExtensionTimeoutConfig struct { // GatewayObjectPollInterval is the polling interval used when waiting for a Gateway object to appear. GatewayObjectPollInterval time.Duration - // HTTPRouteDeletionReconciliationTimeout is the time to wait for controllers to reconcile - // state after an HTTPRoute is deleted, before checking dependent resources or traffic. + // HTTPRouteConditionTimeout represents the maximum time to wait for an HTTPRoute to have a specific condition. HTTPRouteDeletionReconciliationTimeout time.Duration } // DefaultInferenceExtensionTimeoutConfig returns a new InferenceExtensionTimeoutConfig with default values. func DefaultInferenceExtensionTimeoutConfig() InferenceExtensionTimeoutConfig { return InferenceExtensionTimeoutConfig{ - TimeoutConfig: gatewayconfig.DefaultTimeoutConfig(), // Initialize embedded struct - InferencePoolMustHaveConditionTimeout: 300 * time.Second, + TimeoutConfig: gatewayconfig.DefaultTimeoutConfig(), + GeneralMustHaveConditionTimeout: 300 * time.Second, InferencePoolMustHaveConditionInterval: 10 * time.Second, GatewayObjectPollInterval: 5 * time.Second, HTTPRouteDeletionReconciliationTimeout: 5 * time.Second, diff --git a/conformance/utils/kubernetes/helpers.go b/conformance/utils/kubernetes/helpers.go index 2e866ca62..f5cef755f 100644 --- a/conformance/utils/kubernetes/helpers.go +++ b/conformance/utils/kubernetes/helpers.go @@ -23,20 +23,19 @@ import ( "fmt" "reflect" "testing" + "time" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" - // Import the Inference Extension API types - inferenceapi "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" // Adjust if your API version is different - - // Import local config for Inference Extension + inferenceapi "sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2" "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config" - // Import necessary utilities from the core Gateway API conformance suite gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" gatewayapiconfig "sigs.k8s.io/gateway-api/conformance/utils/config" gatewayk8sutils "sigs.k8s.io/gateway-api/conformance/utils/kubernetes" @@ -81,7 +80,7 @@ func InferencePoolMustHaveCondition(t *testing.T, c client.Client, poolNN types. waitErr := wait.PollUntilContextTimeout( context.Background(), timeoutConfig.InferencePoolMustHaveConditionInterval, - timeoutConfig.InferencePoolMustHaveConditionTimeout, + timeoutConfig.GeneralMustHaveConditionTimeout, true, func(ctx context.Context) (bool, error) { pool := &inferenceapi.InferencePool{} // This is the type instance used for Get err := c.Get(ctx, poolNN, pool) @@ -172,7 +171,7 @@ func InferencePoolMustHaveNoParents(t *testing.T, c client.Client, poolNN types. ctx, timeoutConfig.InferencePoolMustHaveConditionInterval, - timeoutConfig.InferencePoolMustHaveConditionTimeout, + timeoutConfig.GeneralMustHaveConditionTimeout, true, func(pollCtx context.Context) (bool, error) { pool := &inferenceapi.InferencePool{} @@ -257,6 +256,43 @@ func InferencePoolMustBeAcceptedByParent(t *testing.T, c client.Client, poolNN t t.Logf("InferencePool %s is Accepted by a parent Gateway (Reason: %s)", poolNN.String(), gatewayv1.GatewayReasonAccepted) } +// InferencePoolMustBeRouteAccepted waits for the specified InferencePool resource +// to exist and report an Accepted condition with Type=RouteConditionAccepted, +// Status=True, and Reason=RouteReasonAccepted within one of its parent statuses. +func InferencePoolMustBeRouteAccepted(t *testing.T, c client.Client, poolNN types.NamespacedName) { + t.Helper() + + expectedPoolCondition := metav1.Condition{ + Type: string(gatewayv1.RouteConditionAccepted), + Status: metav1.ConditionTrue, + Reason: string(gatewayv1.RouteReasonAccepted), + } + + // Call the existing generic helper with the predefined condition + InferencePoolMustHaveCondition(t, c, poolNN, expectedPoolCondition) + t.Logf("InferencePool %s successfully verified with RouteAccepted condition (Type: %s, Status: %s, Reason: %s).", + poolNN.String(), expectedPoolCondition.Type, expectedPoolCondition.Status, expectedPoolCondition.Reason) +} + +// HTTPRouteAndInferencePoolMustBeAcceptedAndRouteAccepted waits for the specified HTTPRoute +// to be Accepted and have its references resolved by the specified Gateway, +// AND for the specified InferencePool to be "RouteAccepted" using the specific +// RouteConditionAccepted criteria. +func HTTPRouteAndInferencePoolMustBeAcceptedAndRouteAccepted( + t *testing.T, + c client.Client, + routeNN types.NamespacedName, + gatewayNN types.NamespacedName, + poolNN types.NamespacedName) { + t.Helper() + var timeoutConfig config.InferenceExtensionTimeoutConfig = config.DefaultInferenceExtensionTimeoutConfig() + + HTTPRouteMustBeAcceptedAndResolved(t, c, timeoutConfig.TimeoutConfig, routeNN, gatewayNN) + InferencePoolMustBeRouteAccepted(t, c, poolNN) + t.Logf("Successfully verified: HTTPRoute %s (Gateway %s) is Accepted & Resolved, and InferencePool %s is RouteAccepted.", + routeNN.String(), gatewayNN.String(), poolNN.String()) +} + // GetGatewayEndpoint waits for the specified Gateway to have at least one address // and returns the address in "host:port" format. // It leverages the upstream Gateway API's WaitForGatewayAddress. @@ -271,3 +307,43 @@ func GetGatewayEndpoint(t *testing.T, k8sClient client.Client, timeoutConfig gat t.Logf("Gateway %s/%s has address: %s", gatewayNN.Namespace, gatewayNN.Name, gwAddr) return gwAddr } + +// GetPod waits for a Pod matching the specified labels to exist in the given +// namespace and have an IP address assigned. This function returns the first +// matching Pod found if there are multiple matches. It fails the on timeout or error. +// TODO(#1003) combline with GetPodsWithLabel that is being introduced in PR #961 +func GetPod(t *testing.T, c client.Client, namespace string, selector labels.Selector, timeout time.Duration) *corev1.Pod { + t.Helper() + + var pods corev1.PodList + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + waitErr := wait.PollUntilContextTimeout(ctx, 1*time.Second, timeout, true, func(ctx context.Context) (bool, error) { + if err := c.List(ctx, &pods, &client.ListOptions{ + LabelSelector: selector, + Namespace: namespace, + }); err != nil { + t.Logf("Error listing pods with selector %s: %v. Retrying.", selector.String(), err) + return false, nil + } + + if len(pods.Items) > 0 { + pod := pods.Items[0] + if pod.Status.PodIP != "" && pod.Status.Phase == corev1.PodRunning { + return true, nil + } + t.Logf("Pod %s found, but not yet running or has no IP. Current phase: %s, IP: '%s'. Retrying.", pod.Name, pod.Status.Phase, pod.Status.PodIP) + } else { + t.Logf("No pods found with selector %s yet. Retrying.", selector.String()) + } + return false, nil + }) + + require.NoErrorf(t, waitErr, "timed out waiting for Pod with selector %s in namespace %s to be ready", selector.String(), namespace) + require.NotEmpty(t, pods.Items, "expected at least one pod for selector %s in namespace %s, but found none", selector.String(), namespace) + + pod := &pods.Items[0] + t.Logf("Successfully found ready Pod %s with IP %s for selector %s", pod.Name, pod.Status.PodIP, selector.String()) + return pod +} diff --git a/conformance/utils/traffic/traffic.go b/conformance/utils/traffic/traffic.go index e65f45fb9..07f944919 100644 --- a/conformance/utils/traffic/traffic.go +++ b/conformance/utils/traffic/traffic.go @@ -17,9 +17,11 @@ limitations under the License. package traffic import ( + "fmt" "net/http" "testing" + corev1 "k8s.io/api/core/v1" gwconfig "sigs.k8s.io/gateway-api/conformance/utils/config" gwhttp "sigs.k8s.io/gateway-api/conformance/utils/http" "sigs.k8s.io/gateway-api/conformance/utils/roundtripper" @@ -103,3 +105,28 @@ func MakeRequestAndExpectNotFound( ) gwhttp.MakeRequestAndExpectEventuallyConsistentResponse(t, r, timeoutConfig, gatewayAddress, expectedResponse) } + +// MakeRequestAndExpectResponseFromPod sends a request to the specified path by IP address and +// uses a special "test-epp-endpoint-selection" header to target a specific backend Pod. +// It then verifies that the response was served by that Pod. +func MakeRequestAndExpectResponseFromPod(t *testing.T, r roundtripper.RoundTripper, timeoutConfig gwconfig.TimeoutConfig, gwAddr, path string, targetPod *corev1.Pod) { + t.Helper() + + const ( + eppSelectionHeader = "test-epp-endpoint-selection" + backendPort = 3000 + ) + + expectedResponse := gwhttp.ExpectedResponse{ + Request: gwhttp.Request{ + Path: path, + Headers: map[string]string{ + eppSelectionHeader: fmt.Sprintf("%s:%d", targetPod.Status.PodIP, backendPort), + }, + }, + Backend: targetPod.Name, + Namespace: targetPod.Namespace, + } + + gwhttp.MakeRequestAndExpectEventuallyConsistentResponse(t, r, timeoutConfig, gwAddr, expectedResponse) +}