diff --git a/test/e2e/upgrade/upgrade.go b/test/e2e/upgrade/upgrade.go index 80824f7d97f2..ea1f2474d34e 100644 --- a/test/e2e/upgrade/upgrade.go +++ b/test/e2e/upgrade/upgrade.go @@ -44,6 +44,7 @@ import ( "github.com/openshift/origin/test/e2e/upgrade/manifestdelete" mc "github.com/openshift/origin/test/extended/machine_config" "github.com/openshift/origin/test/extended/prometheus" + "github.com/openshift/origin/test/extended/router" "github.com/openshift/origin/test/extended/util/disruption" "github.com/openshift/origin/test/extended/util/operator" ) @@ -69,6 +70,7 @@ func AllTests() []upgrades.Test { &prometheus.ImagePullsAreFast{}, &prometheus.MetricsAvailableAfterUpgradeTest{}, &dns.UpgradeTest{}, + &router.GatewayAPIUpgradeTest{}, } } diff --git a/test/extended/router/gatewayapi_upgrade.go b/test/extended/router/gatewayapi_upgrade.go new file mode 100644 index 000000000000..a2bf8b45ecb4 --- /dev/null +++ b/test/extended/router/gatewayapi_upgrade.go @@ -0,0 +1,306 @@ +package router + +import ( + "context" + "fmt" + "strings" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + exutil "github.com/openshift/origin/test/extended/util" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + e2e "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/upgrades" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" +) + +// GatewayAPIUpgradeTest verifies that Gateway API resources work during upgrade, +// whether using OLM-based or CIO-based (NO-OLM) provisioning +type GatewayAPIUpgradeTest struct { + oc *exutil.CLI + namespace string + gatewayName string + routeName string + hostname string + startedWithNoOLM bool // tracks if GatewayAPIWithoutOLM was enabled at start + loadBalancerSupported bool + managedDNS bool +} + +func (t *GatewayAPIUpgradeTest) Name() string { + return "gateway-api-upgrade" +} + +func (t *GatewayAPIUpgradeTest) DisplayName() string { + return "[sig-network-edge][Feature:Router][apigroup:gateway.networking.k8s.io] Verify Gateway API functionality during upgrade" +} + +// Setup creates Gateway and HTTPRoute resources and tests connectivity +func (t *GatewayAPIUpgradeTest) Setup(ctx context.Context, f *e2e.Framework) { + g.By("Setting up Gateway API upgrade test") + + t.oc = exutil.NewCLIWithFramework(f).AsAdmin() + t.namespace = f.Namespace.Name + t.gatewayName = "upgrade-test-gateway" + t.routeName = "test-httproute" + + // Check platform support and get capabilities (LoadBalancer, DNS) + t.loadBalancerSupported, t.managedDNS = checkPlatformSupportAndGetCapabilities(t.oc) + + g.By("Checking if GatewayAPIWithoutOLM feature gate is enabled before upgrade") + t.startedWithNoOLM = isNoOLMFeatureGateEnabled(t.oc) + + // Skip on clusters missing OLM/Marketplace capabilities if starting with OLM mode + if !t.startedWithNoOLM { + exutil.SkipIfMissingCapabilities(t.oc, olmCapabilities...) + } + + if t.startedWithNoOLM { + e2e.Logf("Starting with GatewayAPIWithoutOLM enabled (NO-OLM mode)") + } else { + e2e.Logf("Starting with OLM-based Gateway API provisioning") + } + + g.By("Creating default GatewayClass to trigger Gateway API installation") + gatewayClassControllerName := "openshift.io/gateway-controller/v1" + gatewayClass := buildGatewayClass(gatewayClassName, gatewayClassControllerName) + _, err := t.oc.AdminGatewayApiClient().GatewayV1().GatewayClasses().Create(ctx, gatewayClass, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + e2e.Failf("Failed to create GatewayClass %q: %v", gatewayClassName, err) + } + + g.By("Waiting for GatewayClass to be accepted") + err = checkGatewayClassCondition(t.oc, gatewayClassName, string(gatewayv1.GatewayClassConditionStatusAccepted), metav1.ConditionTrue) + o.Expect(err).NotTo(o.HaveOccurred(), "GatewayClass %q was not accepted", gatewayClassName) + + g.By("Getting the default domain") + defaultIngressDomain, err := getDefaultIngressClusterDomainName(t.oc, time.Minute) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to find default domain name") + customDomain := strings.Replace(defaultIngressDomain, "apps.", "gw-upgrade.", 1) + t.hostname = "test-upgrade." + customDomain + + g.By("Creating Gateway") + _, err = createAndCheckGateway(t.oc, t.gatewayName, gatewayClassName, customDomain, t.loadBalancerSupported) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to create Gateway") + + g.By("Verify the gateway's LoadBalancer service and DNSRecords") + if t.loadBalancerSupported { + assertGatewayLoadbalancerReady(t.oc, t.gatewayName, t.gatewayName+"-openshift-default") + } + + if t.managedDNS { + assertDNSRecordStatus(t.oc, t.gatewayName) + } + + if !t.startedWithNoOLM { + g.By("Validating OLM-based provisioning before upgrade") + validateOLMBasedOSSM(t.oc, 20*time.Minute) + e2e.Logf("GatewayAPI resources successfully created with OLM-based provisioning") + } else { + g.By("Validating CIO-based (NO-OLM) provisioning before upgrade") + t.validateCIOProvisioning(ctx, false) // false = no migration occurred + e2e.Logf("GatewayAPI resources successfully created with CIO-based (NO-OLM) provisioning") + } + + g.By("Creating HTTPRoute with backend") + backendName := "echo-backend-" + t.gatewayName + createHttpRoute(t.oc, t.gatewayName, t.routeName, t.hostname, backendName) + + g.By("Waiting for HTTPRoute to be accepted") + _, err = assertHttpRouteSuccessful(t.oc, t.gatewayName, t.routeName) + o.Expect(err).NotTo(o.HaveOccurred()) + + if t.loadBalancerSupported && t.managedDNS { + g.By("Verifying HTTP connectivity before upgrade") + assertHttpRouteConnection(t.hostname) + e2e.Logf("HTTPRoute connectivity verified before upgrade") + } +} + +// Test validates that resources continue working during upgrade and validates provisioning method +func (t *GatewayAPIUpgradeTest) Test(ctx context.Context, f *e2e.Framework, done <-chan struct{}, _ upgrades.UpgradeType) { + g.By("Validating Gateway API resources remain functional after upgrade") + + // Block until upgrade completes + g.By("Waiting for upgrade to complete") + <-done + + g.By("Verifying Gateway still exists and is programmed") + _, err := checkGatewayStatus(t.oc, t.gatewayName, ingressNamespace, t.loadBalancerSupported) + o.Expect(err).NotTo(o.HaveOccurred(), "Gateway should remain programmed") + + g.By("Checking if GatewayAPIWithoutOLM feature gate is enabled after upgrade") + endsWithNoOLM := isNoOLMFeatureGateEnabled(t.oc) + + // Determine if migration happened: started with OLM, ended with NO-OLM + migrationOccurred := !t.startedWithNoOLM && endsWithNoOLM + if migrationOccurred { + e2e.Logf("Migration detected: started with OLM, ended with NO-OLM") + } else { + e2e.Logf("No migration occurred (started with NO-OLM=%v, ended with NO-OLM=%v)", t.startedWithNoOLM, endsWithNoOLM) + } + + if endsWithNoOLM { + g.By("GatewayAPIWithoutOLM is enabled - validating CIO-based (NO-OLM) provisioning") + t.validateCIOProvisioning(ctx, migrationOccurred) + } else { + g.By("GatewayAPIWithoutOLM is disabled - validating OLM-based provisioning") + // A shorter timeout here is because the resources should already exist post-upgrade state. + validateOLMBasedOSSM(t.oc, 2*time.Minute) + } + + g.By("Verifying HTTPRoute still exists and is accepted after upgrade") + _, err = assertHttpRouteSuccessful(t.oc, t.gatewayName, t.routeName) + o.Expect(err).NotTo(o.HaveOccurred()) + + if t.loadBalancerSupported && t.managedDNS { + g.By("Verifying HTTP connectivity after upgrade") + assertHttpRouteConnection(t.hostname) + } + + if migrationOccurred { + e2e.Logf("Gateway API successfully migrated from OLM to CIO (NO-OLM) during upgrade") + } else if endsWithNoOLM { + e2e.Logf("Gateway API using CIO-based (NO-OLM) provisioning - no migration occurred") + } else { + e2e.Logf("Gateway API remains on OLM-based provisioning after upgrade") + } +} + +// validateCIOProvisioning validates that Gateway API is using CIO-based (NO-OLM) provisioning +// If migrationOccurred is true, validates the migration from OLM to CIO Sail Library +func (t *GatewayAPIUpgradeTest) validateCIOProvisioning(ctx context.Context, migrationOccurred bool) { + g.By("Verifying Istiod control plane is running") + err := checkIstiodRunning(t.oc, 2*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying CIO has taken ownership via GatewayClass") + // Check GatewayClass has CIO sail library finalizer + err = checkGatewayClassFinalizer(t.oc, gatewayClassName, "openshift.io/ingress-operator-sail-finalizer") + o.Expect(err).NotTo(o.HaveOccurred(), "GatewayClass should have CIO sail library finalizer") + + // Check GatewayClass has required CIO conditions + err = checkGatewayClassCondition(t.oc, gatewayClassName, gatewayClassControllerInstalledConditionType, metav1.ConditionTrue) + o.Expect(err).NotTo(o.HaveOccurred(), "GatewayClass should have ControllerInstalled condition") + + err = checkGatewayClassCondition(t.oc, gatewayClassName, gatewayClassCRDsReadyConditionType, metav1.ConditionTrue) + o.Expect(err).NotTo(o.HaveOccurred(), "GatewayClass should have CRDsReady condition") + + g.By("Verifying istiod deployment is managed by sail library") + err = checkIstiodManagedBySailLibrary(t.oc) + o.Expect(err).NotTo(o.HaveOccurred(), "Istiod should be managed by sail library") + + if migrationOccurred { + g.By("Verifying Istio CRDs remain managed by OLM after migration") + // When migrating from OLM, CRDs were installed by OLM and should remain OLM-managed + err = assertIstioCRDsOwnedByOLM(t.oc) + o.Expect(err).NotTo(o.HaveOccurred(), "Istio CRDs should remain OLM-managed after migration") + + g.By("Verifying OLM subscription still exists after migration") + // The OLM Subscription for Sail Operator should still exist (it's not removed during migration) + _, err = t.oc.Run("get").Args("subscription", "-n", expectedSubscriptionNamespace, expectedSubscriptionName, "-o", "name").Output() + o.Expect(err).NotTo(o.HaveOccurred(), "Sail Operator subscription should still exist after migration") + + g.By("Verifying Istio CR was removed during migration") + out, err := t.oc.Run("get").Args("--ignore-not-found=true", "istio", istioName, "-o", "name").Output() + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(out)).To(o.BeEmpty(), "Istio CR %q should not exist", istioName) + + e2e.Logf("Successfully validated OLM to NO-OLM migration") + } else { + g.By("Verifying Istio CRDs are managed by CIO") + // When using CIO from the start, CRDs are CIO-managed + err := assertIstioCRDsOwnedByCIO(t.oc) + o.Expect(err).NotTo(o.HaveOccurred(), "Istio CRDs should be CIO-managed when using NO-OLM from the start") + + g.By("Verifying CIO-managed resources") + // When using CIO from the start, Istio CR should not exist (CIO uses Sail Library directly) + out, err := t.oc.Run("get").Args("--ignore-not-found=true", "istio", istioName, "-o", "name").Output() + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(strings.TrimSpace(out)).To(o.BeEmpty(), "Istio CR %q should not exist when using CIO-based provisioning", istioName) + e2e.Logf("Successfully validated CIO-based (NO-OLM) provisioning") + } +} + +// Teardown cleans up Gateway API resources, Istio CR, and OSSM subscription +// This runs even if the test fails, ensuring complete cleanup +func (t *GatewayAPIUpgradeTest) Teardown(ctx context.Context, f *e2e.Framework) { + g.By("Deleting the Gateway") + err := t.oc.AdminGatewayApiClient().GatewayV1().Gateways(ingressNamespace).Delete(ctx, t.gatewayName, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + e2e.Logf("Failed to delete Gateway %q: %v", t.gatewayName, err) + } + + // Wait for Gateway to be fully deleted before removing GatewayClass + // This prevents orphaned resources if the controller (defined by GatewayClass) is removed + // before Istiod completes cleanup + g.By("Waiting for Gateway to be fully deleted") + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, false, func(ctx context.Context) (bool, error) { + _, err := t.oc.AdminGatewayApiClient().GatewayV1().Gateways(ingressNamespace).Get(ctx, t.gatewayName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + e2e.Logf("Gateway %q successfully deleted", t.gatewayName) + return true, nil + } + return false, nil + }) + if err != nil { + e2e.Logf("Gateway %q still exists after 2 minutes, continuing cleanup anyway", t.gatewayName) + } + + g.By("Deleting the GatewayClass") + err = t.oc.AdminGatewayApiClient().GatewayV1().GatewayClasses().Delete(ctx, gatewayClassName, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + e2e.Logf("Failed to delete GatewayClass %q: %v", gatewayClassName, err) + } + + g.By("Deleting the Istio CR if it exists") + // This should get cleaned up by the CIO, but this is here just in case of failure + err = t.oc.Run("delete").Args("--ignore-not-found=true", "istio", istioName).Execute() + if err != nil { + e2e.Logf("Failed to delete Istio CR %q: %v", istioName, err) + } + + g.By("Waiting for istiod pods to be deleted") + waitForIstiodPodDeletion(t.oc) + + g.By("Deleting the Sail Operator subscription") + // This doesn't get deleted by the CIO, so must manually clean up + err = t.oc.Run("delete").Args("--ignore-not-found=true", "subscription", "-n", expectedSubscriptionNamespace, expectedSubscriptionName).Execute() + if err != nil { + e2e.Logf("Failed to delete Subscription %q: %v", expectedSubscriptionName, err) + } + + g.By("Deleting Sail Operator CSV by label selector") + // Delete CSV using label selector to handle any version (e.g., servicemeshoperator3.v3.2.0) + labelSelector := fmt.Sprintf("operators.coreos.com/%s", serviceMeshOperatorName) + err = t.oc.Run("delete").Args("csv", "-n", expectedSubscriptionNamespace, "-l", labelSelector, "--ignore-not-found=true").Execute() + if err != nil { + e2e.Logf("Failed to delete CSV with label %q: %v", labelSelector, err) + } + + g.By("Deleting OLM-managed Istio CRDs to clean up migration state") + // Delete Istio CRDs so subsequent NO-OLM tests don't see OLM-managed CRDs + // Use LabelSelector to only delete OLM-managed CRDs and suffix check as additional safety + crdList, err := t.oc.AdminApiextensionsClient().ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{ + LabelSelector: "olm.managed=true", + }) + if err != nil { + e2e.Logf("Failed to list OLM-managed CRDs: %v", err) + } else { + for _, crd := range crdList.Items { + if strings.HasSuffix(crd.Name, "istio.io") { + err := t.oc.Run("delete").Args("--ignore-not-found=true", "crd", crd.Name).Execute() + if err != nil { + e2e.Logf("Failed to delete CRD %q: %v", crd.Name, err) + } + } + } + } + + e2e.Logf("Gateway API resources, Istio CR, OSSM subscription, CSV, and Istio CRDs successfully cleaned up") +} diff --git a/test/extended/router/gatewayapicontroller.go b/test/extended/router/gatewayapicontroller.go index 4d77832a0b6f..c047bc6caa41 100644 --- a/test/extended/router/gatewayapicontroller.go +++ b/test/extended/router/gatewayapicontroller.go @@ -19,6 +19,7 @@ import ( operatoringressv1 "github.com/openshift/api/operatoringress/v1" exutil "github.com/openshift/origin/test/extended/util" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -58,6 +59,8 @@ const ( expectedSubscriptionSource = "redhat-operators" // The expected OSSM operator namespace. expectedSubscriptionNamespace = "openshift-operators" + // The expected OSSM operator deployment name. + deploymentOSSMName = "servicemesh-operator3" gatewayClassCRDsReadyConditionType = "CRDsReady" gatewayClassControllerInstalledConditionType = "ControllerInstalled" @@ -106,7 +109,6 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat defer g.GinkgoRecover() var ( oc = exutil.NewCLIWithPodSecurityLevel("gatewayapi-controller", admissionapi.LevelBaseline) - csvName string err error gateways []string infPoolCRD = "https://raw.githubusercontent.com/kubernetes-sigs/gateway-api-inference-extension/main/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml" @@ -116,20 +118,10 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat const ( // gatewayClassControllerName is the name that must be used to create a supported gatewayClass. - gatewayClassControllerName = "openshift.io/gateway-controller/v1" - //OSSM Deployment Pod Name - deploymentOSSMName = "servicemesh-operator3" + gatewayClassControllerName = "openshift.io/gateway-controller/v1" openshiftOperatorsNamespace = "openshift-operators" ) g.BeforeEach(func() { - isokd, err := isOKD(oc) - if err != nil { - e2e.Failf("Failed to get clusterversion to determine if release is OKD: %v", err) - } - if isokd { - g.Skip("Skipping on OKD cluster as OSSM is not available as a community operator") - } - // Check platform support and get capabilities (LoadBalancer, DNS) loadBalancerSupported, managedDNS = checkPlatformSupportAndGetCapabilities(oc) @@ -265,9 +257,9 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat errCheck := checkGatewayClassCondition(oc, gatewayClassName, string(gatewayapiv1.GatewayClassConditionStatusAccepted), metav1.ConditionTrue) o.Expect(errCheck).NotTo(o.HaveOccurred(), "GatewayClass %q was not installed and accepted", gatewayClassName) - g.By("Ensure the istiod Deployment is present and managed by helm") - errCheck = checkIstiodExists(oc, ingressNamespace, istiodDeployment) - o.Expect(errCheck).NotTo(o.HaveOccurred(), "istiod deployment %s does not exist", istiodDeployment) + g.By("Ensure the istiod Deployment is present and running") + errCheck = checkIstiodRunning(oc, 5*time.Minute) + o.Expect(errCheck).NotTo(o.HaveOccurred(), "istiod deployment %s is not running", istiodDeployment) g.By("Check the corresponding Istio CRDs are managed by CIO") err := assertIstioCRDsOwnedByCIO(oc) @@ -282,22 +274,8 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat o.Expect(errCheck).NotTo(o.HaveOccurred(), "GatewayClass %q was not installed and accepted", gatewayClassName) g.By("Confirm the istiod Deployment contains the correct managed label") - waitErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 5*time.Minute, false, func(context context.Context) (bool, error) { - istiod, err := oc.AdminKubeClient().AppsV1().Deployments(ingressNamespace).Get(context, istiodDeployment, metav1.GetOptions{}) - if err != nil { - e2e.Logf("Failed to get istiod deployment %q: %v; retrying...", istiodDeployment, err) - return false, nil - } - labels := istiod.ObjectMeta.Labels - e2e.Logf("the labels are %s", labels) - if value, ok := labels["managed-by"]; ok && value == "sail-library" { - e2e.Logf("The istiod deployment %q is managed by the sail library and has no sail-operator dependencies", istiodDeployment) - return true, nil - } - e2e.Logf("The istiod deployment %q does not have the label, retrying...", istiodDeployment) - return false, nil - }) - o.Expect(waitErr).NotTo(o.HaveOccurred(), "Timed out looking for the label in deployment %q", istiodDeployment) + err := checkIstiodManagedBySailLibrary(oc) + o.Expect(err).NotTo(o.HaveOccurred()) }) @@ -308,72 +286,8 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat g.Skip("Skip this test since it requires OLM resources") } - //check the catalogSource - g.By("Check OLM catalogSource, subscription, CSV and Pod") - waitCatalogErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 20*time.Minute, false, func(context context.Context) (bool, error) { - catalog, err := oc.AsAdmin().Run("get").Args("-n", "openshift-marketplace", "catalogsource", expectedSubscriptionSource, "-o=jsonpath={.status.connectionState.lastObservedState}").Output() - if err != nil { - e2e.Logf("Failed to get CatalogSource %q: %v; retrying...", expectedSubscriptionSource, err) - return false, nil - } - if catalog != "READY" { - e2e.Logf("CatalogSource %q is not in ready state, retrying...", expectedSubscriptionSource) - return false, nil - } - e2e.Logf("CatalogSource %q is ready!", expectedSubscriptionSource) - return true, nil - }) - o.Expect(waitCatalogErr).NotTo(o.HaveOccurred(), "Timed out waiting for CatalogSource %q to become ready", expectedSubscriptionSource) - - // check Subscription - waitVersionErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 20*time.Minute, false, func(context context.Context) (bool, error) { - csvName, err = oc.AsAdmin().Run("get").Args("-n", expectedSubscriptionNamespace, "subscription", expectedSubscriptionName, "-o=jsonpath={.status.installedCSV}").Output() - if err != nil { - e2e.Logf("Failed to get Subscription %q: %v; retrying...", expectedSubscriptionName, err) - return false, nil - } - if csvName == "" { - e2e.Logf("Subscription %q doesn't have installed CSV, retrying...", expectedSubscriptionName) - return false, nil - } - e2e.Logf("Subscription %q has installed CSV: %s", expectedSubscriptionName, csvName) - return true, nil - }) - o.Expect(waitVersionErr).NotTo(o.HaveOccurred(), "Timed out waiting for the ClusterServiceVersion to install") - - waitCSVErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 20*time.Minute, false, func(context context.Context) (bool, error) { - csvStatus, err := oc.AsAdmin().Run("get").Args("-n", expectedSubscriptionNamespace, "clusterserviceversion", csvName, "-o=jsonpath={.status.phase}").Output() - if err != nil { - e2e.Logf("Failed to get ClusterServiceVersion %q: %v; retrying...", csvName, err) - return false, nil - } - if csvStatus != "Succeeded" { - e2e.Logf("Cluster Service Version %q is not successful, retrying...", csvName) - return false, nil - } - e2e.Logf("Cluster Service Version %q has succeeded!", csvName) - return true, nil - }) - o.Expect(waitCSVErr).NotTo(o.HaveOccurred(), "Cluster Service Version %s never reached succeeded status", csvName) - - // get OSSM Operator deployment - waitErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 20*time.Minute, false, func(context context.Context) (bool, error) { - deployOSSM, err := oc.AdminKubeClient().AppsV1().Deployments(expectedSubscriptionNamespace).Get(context, "servicemesh-operator3", metav1.GetOptions{}) - if err != nil { - e2e.Logf("Failed to get OSSM operator deployment %q: %v; retrying...", deploymentOSSMName, err) - return false, nil - } - if deployOSSM.Status.ReadyReplicas < 1 { - e2e.Logf("OSSM operator deployment %q is not ready, retrying...", deploymentOSSMName) - return false, nil - } - e2e.Logf("OSSM operator deployment %q is ready", deploymentOSSMName) - return true, nil - }) - o.Expect(waitErr).NotTo(o.HaveOccurred(), "OSSM Operator deployment %q did not successfully deploy its pod", deploymentOSSMName) - - g.By("Confirm that Istio CR is created and in healthy state") - waitForIstioHealthy(oc) + // Validate OSSM is installed via OLM + validateOLMBasedOSSM(oc, 20*time.Minute) }) g.It("Ensure default gatewayclass is accepted", func() { @@ -427,11 +341,11 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat if !isNoOLMFeatureGateEnabled(oc) { g.By("Confirm that ISTIO CR is created and in healthy state") - waitForIstioHealthy(oc) + waitForIstioHealthy(oc, 20*time.Minute) } g.By("Confirm that the istiod deployment still exists") - errIstio := checkIstiodExists(oc, ingressNamespace, istiodDeployment) + _, errIstio := checkIstiodExists(oc, 5*time.Minute) o.Expect(errIstio).NotTo(o.HaveOccurred(), "istiod deployment %s does not exist", istiodDeployment) }) @@ -636,6 +550,14 @@ var _ = g.Describe("[sig-network-edge][OCPFeatureGate:GatewayAPIController][Feat // checkPlatformSupportAndGetCapabilities verifies the platform is supported and returns // platform capabilities for LoadBalancer services and managed DNS. func checkPlatformSupportAndGetCapabilities(oc *exutil.CLI) (loadBalancerSupported bool, managedDNS bool) { + // Skip on OKD since OSSM is not available as a community operator + // TODO: Determine if we can enable and start testing OKD with Sail Library + isokd, err := isOKD(oc) + o.Expect(err).NotTo(o.HaveOccurred(), "Failed to get clusterversion to determine if release is OKD") + if isokd { + g.Skip("Skipping on OKD cluster as OSSM is not available as a community operator") + } + infra, err := oc.AdminConfigClient().ConfigV1().Infrastructures().Get(context.Background(), "cluster", metav1.GetOptions{}) o.Expect(err).NotTo(o.HaveOccurred()) o.Expect(infra).NotTo(o.BeNil()) @@ -702,8 +624,7 @@ func isNoOLMFeatureGateEnabled(oc *exutil.CLI) bool { return false } -func waitForIstioHealthy(oc *exutil.CLI) { - timeout := 20 * time.Minute +func waitForIstioHealthy(oc *exutil.CLI, timeout time.Duration) { err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, timeout, false, func(context context.Context) (bool, error) { istioStatus, errIstio := oc.AsAdmin().Run("get").Args("istio", istioName, "-o=jsonpath={.status.state}").Output() if errIstio != nil { @@ -882,17 +803,32 @@ func createHttpRoute(oc *exutil.CLI, gwName, routeName, hostname, backendRefname e2e.Failf("Unable to create httpRoute, no gateway available during route assertion %v", errGwStatus) } - // Create the backend (service and pod) needed for the route to have resolvedRefs=true. - // The httproute, service, and pod are cleaned up when the namespace is automatically deleted. - // buildEchoPod builds a pod that listens on port 8080. - // Use regular user to create pod, service and httproute. - echoPod := buildEchoPod(backendRefname, namespace) - _, echoPodErr := oc.KubeClient().CoreV1().Pods(namespace).Create(context.TODO(), echoPod, metav1.CreateOptions{}) - o.Expect(echoPodErr).NotTo(o.HaveOccurred()) + // Create the backend (deployment and service) needed for the route to have resolvedRefs=true. + // The httproute, service, and deployment are cleaned up when the namespace is automatically deleted. + // buildEchoDeployment builds a deployment that listens on port 8080. + // Use admin client to create deployment, service and httproute. + echoDeployment := buildEchoDeployment(backendRefname, namespace) + _, deployErr := oc.AdminKubeClient().AppsV1().Deployments(namespace).Create(context.TODO(), echoDeployment, metav1.CreateOptions{}) + o.Expect(deployErr).NotTo(o.HaveOccurred()) + + // Wait for deployment to be ready + deployReadyErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 2*time.Minute, false, func(context context.Context) (bool, error) { + dep, err := oc.AdminKubeClient().AppsV1().Deployments(namespace).Get(context, backendRefname, metav1.GetOptions{}) + if err != nil { + return false, nil + } + if dep.Status.ReadyReplicas > 0 { + e2e.Logf("Deployment %s is ready with %d replicas", backendRefname, dep.Status.ReadyReplicas) + return true, nil + } + e2e.Logf("Waiting for deployment %s to be ready...", backendRefname) + return false, nil + }) + o.Expect(deployReadyErr).NotTo(o.HaveOccurred(), "Deployment %s never became ready", backendRefname) // buildEchoService builds a service that targets port 8080. - echoService := buildEchoService(echoPod.Name, namespace, echoPod.ObjectMeta.Labels) - _, echoServiceErr := oc.KubeClient().CoreV1().Services(namespace).Create(context.Background(), echoService, metav1.CreateOptions{}) + echoService := buildEchoService(backendRefname, namespace, echoDeployment.Spec.Template.ObjectMeta.Labels) + _, echoServiceErr := oc.AdminKubeClient().CoreV1().Services(namespace).Create(context.Background(), echoService, metav1.CreateOptions{}) o.Expect(echoServiceErr).NotTo(o.HaveOccurred()) // Create the HTTPRoute @@ -968,6 +904,27 @@ func buildEchoPod(name, namespace string) *corev1.Pod { } } +// buildEchoDeployment returns a deployment for an socat-based echo server. +func buildEchoDeployment(name, namespace string) *appsv1.Deployment { + pod := buildEchoPod(name, namespace) + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: pointer.Int32(1), + Selector: &metav1.LabelSelector{ + MatchLabels: pod.ObjectMeta.Labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: pod.ObjectMeta, + Spec: pod.Spec, + }, + }, + } +} + // buildEchoService returns a service definition for an HTTP service. func buildEchoService(name, namespace string, labels map[string]string) *corev1.Service { return &corev1.Service{ @@ -1361,17 +1318,140 @@ func waitForIstiodPodDeletion(oc *exutil.CLI) { }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(o.Succeed()) } -func checkIstiodExists(oc *exutil.CLI, namespace string, name string) error { - waitErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 5*time.Minute, false, func(context context.Context) (bool, error) { - istiod, err := oc.AdminKubeClient().AppsV1().Deployments(namespace).Get(context, name, metav1.GetOptions{}) +// validateOLMBasedOSSM validates that Gateway API is using OLM-based provisioning. +func validateOLMBasedOSSM(oc *exutil.CLI, timeout time.Duration) { + pollInterval := 5 * time.Second + + //check the catalogSource + g.By("Check OLM catalogSource, subscription, CSV and Deployment") + waitCatalogErr := wait.PollUntilContextTimeout(context.Background(), pollInterval, timeout, false, func(context context.Context) (bool, error) { + catalog, err := oc.AsAdmin().Run("get").Args("-n", "openshift-marketplace", "catalogsource", expectedSubscriptionSource, "-o=jsonpath={.status.connectionState.lastObservedState}").Output() + if err != nil { + e2e.Logf("Failed to get CatalogSource %q: %v; retrying...", expectedSubscriptionSource, err) + return false, nil + } + if catalog != "READY" { + e2e.Logf("CatalogSource %q is not in ready state, retrying...", expectedSubscriptionSource) + return false, nil + } + e2e.Logf("CatalogSource %q is ready!", expectedSubscriptionSource) + return true, nil + }) + o.Expect(waitCatalogErr).NotTo(o.HaveOccurred(), "Timed out waiting for CatalogSource %q to become ready", expectedSubscriptionSource) + + // check Subscription + var csvName string + var err error + waitVersionErr := wait.PollUntilContextTimeout(context.Background(), pollInterval, timeout, false, func(context context.Context) (bool, error) { + csvName, err = oc.AsAdmin().Run("get").Args("-n", expectedSubscriptionNamespace, "subscription", expectedSubscriptionName, "-o=jsonpath={.status.installedCSV}").Output() + if err != nil { + e2e.Logf("Failed to get Subscription %q: %v; retrying...", expectedSubscriptionName, err) + return false, nil + } + if csvName == "" { + e2e.Logf("Subscription %q doesn't have installed CSV, retrying...", expectedSubscriptionName) + return false, nil + } + e2e.Logf("Subscription %q has installed CSV: %s", expectedSubscriptionName, csvName) + return true, nil + }) + o.Expect(waitVersionErr).NotTo(o.HaveOccurred(), "Timed out waiting for the ClusterServiceVersion to install") + + waitCSVErr := wait.PollUntilContextTimeout(context.Background(), pollInterval, timeout, false, func(context context.Context) (bool, error) { + csvStatus, err := oc.AsAdmin().Run("get").Args("-n", expectedSubscriptionNamespace, "clusterserviceversion", csvName, "-o=jsonpath={.status.phase}").Output() + if err != nil { + e2e.Logf("Failed to get ClusterServiceVersion %q: %v; retrying...", csvName, err) + return false, nil + } + if csvStatus != "Succeeded" { + e2e.Logf("Cluster Service Version %q is not successful, retrying...", csvName) + return false, nil + } + e2e.Logf("Cluster Service Version %q has succeeded!", csvName) + return true, nil + }) + o.Expect(waitCSVErr).NotTo(o.HaveOccurred(), "Cluster Service Version %s never reached succeeded status", csvName) + + // get OSSM Operator deployment + waitErr := wait.PollUntilContextTimeout(context.Background(), pollInterval, timeout, false, func(context context.Context) (bool, error) { + deployOSSM, err := oc.AdminKubeClient().AppsV1().Deployments(expectedSubscriptionNamespace).Get(context, deploymentOSSMName, metav1.GetOptions{}) if err != nil { - e2e.Logf("Failed to get istiod deployment %q: %v; retrying...", name, err) + e2e.Logf("Failed to get OSSM operator deployment %q: %v; retrying...", deploymentOSSMName, err) + return false, nil + } + if deployOSSM.Status.ReadyReplicas < 1 { + e2e.Logf("OSSM operator deployment %q is not ready, retrying...", deploymentOSSMName) + return false, nil + } + e2e.Logf("OSSM operator deployment %q is ready", deploymentOSSMName) + return true, nil + }) + o.Expect(waitErr).NotTo(o.HaveOccurred(), "OSSM Operator deployment %q did not successfully deploy its pod", deploymentOSSMName) + + g.By("Confirm that Istio CR is created and in healthy state") + waitForIstioHealthy(oc, timeout) + + g.By("Verifying Istiod control plane is running") + err = checkIstiodRunning(oc, timeout) + o.Expect(err).NotTo(o.HaveOccurred()) + + g.By("Verifying Istio CRDs are managed by OLM") + err = assertIstioCRDsOwnedByOLM(oc) + o.Expect(err).NotTo(o.HaveOccurred(), "Istio CRDs should be OLM-managed") +} + +func checkIstiodExists(oc *exutil.CLI, timeout time.Duration) (*appsv1.Deployment, error) { + var deployment *appsv1.Deployment + waitErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, timeout, false, func(context context.Context) (bool, error) { + istiod, err := oc.AdminKubeClient().AppsV1().Deployments(ingressNamespace).Get(context, istiodDeployment, metav1.GetOptions{}) + if err != nil { + e2e.Logf("Failed to get istiod deployment %q: %v; retrying...", istiodDeployment, err) return false, nil } e2e.Logf("Successfully found the istiod Deployment: %s", istiod) + deployment = istiod return true, nil }) - o.Expect(waitErr).NotTo(o.HaveOccurred(), "Timed out looking for the deployment %q", name) + if waitErr != nil { + return nil, fmt.Errorf("timed out looking for the deployment %q: %w", istiodDeployment, waitErr) + } + return deployment, nil +} + +// checkIstiodRunning verifies that at least one istiod pod is ready +func checkIstiodRunning(oc *exutil.CLI, timeout time.Duration) error { + ctx := context.Background() + + // Wait for Istiod deployment to exist + deployment, err := checkIstiodExists(oc, timeout) + if err != nil { + return err + } + + // Verify at least one istiod pod is ready + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, timeout, false, func(ctx context.Context) (bool, error) { + pods, err := oc.AdminKubeClient().CoreV1().Pods(ingressNamespace).List(ctx, metav1.ListOptions{ + LabelSelector: metav1.FormatLabelSelector(deployment.Spec.Selector), + }) + if err != nil { + return false, nil + } + if len(pods.Items) < 1 { + e2e.Logf("Waiting for istiod pods to be created...") + return false, nil + } + for _, pod := range pods.Items { + if podConditionStatus(&pod, corev1.PodReady) == corev1.ConditionTrue { + return true, nil + } + } + e2e.Logf("Waiting for at least one istiod pod to be ready...") + return false, nil + }) + if err != nil { + return fmt.Errorf("at least one istiod pod should be ready: %w", err) + } + return nil } @@ -1435,7 +1515,8 @@ func checkGatewayClassFinalizer(oc *exutil.CLI, name string, expectedFinalizer s return nil } -func assertIstioCRDsOwnedByCIO(oc *exutil.CLI) error { +// assertIstioCRDsHaveLabel verifies that all Istio CRDs have the specified label with expected value +func assertIstioCRDsHaveLabel(oc *exutil.CLI, labelKey string, expectedValue string, owner string) error { crdList, err := oc.AdminApiextensionsClient().ApiextensionsV1().CustomResourceDefinitions().List(context.Background(), metav1.ListOptions{}) if err != nil { return fmt.Errorf("failed to list CRDs: %w", err) @@ -1445,16 +1526,47 @@ func assertIstioCRDsOwnedByCIO(oc *exutil.CLI) error { for _, crd := range crdList.Items { if strings.HasSuffix(crd.Name, "istio.io") { istioFound = true - if value, ok := crd.Labels["ingress.operator.openshift.io/owned"]; ok && value == "true" { - e2e.Logf("CRD %s has the specific label value: %s", crd.Name, value) + if value, ok := crd.Labels[labelKey]; ok && value == expectedValue { + e2e.Logf("CRD %s has label %s=%s", crd.Name, labelKey, value) continue } - return fmt.Errorf("CRD %s is not managed by CIO", crd.Name) + return fmt.Errorf("CRD %s is not managed by %s (expected label %s=%s)", crd.Name, owner, labelKey, expectedValue) } } if !istioFound { - return fmt.Errorf("There are no istio.io CRDs found") + return fmt.Errorf("no istio.io CRDs found") + } + return nil +} + +func assertIstioCRDsOwnedByCIO(oc *exutil.CLI) error { + return assertIstioCRDsHaveLabel(oc, "ingress.operator.openshift.io/owned", "true", "CIO") +} + +func assertIstioCRDsOwnedByOLM(oc *exutil.CLI) error { + return assertIstioCRDsHaveLabel(oc, "olm.managed", "true", "OLM") +} + +// checkIstiodManagedBySailLibrary verifies that the istiod deployment has the managed-by: sail-library label +func checkIstiodManagedBySailLibrary(oc *exutil.CLI) error { + waitErr := wait.PollUntilContextTimeout(context.Background(), 1*time.Second, 5*time.Minute, false, func(context context.Context) (bool, error) { + istiod, err := oc.AdminKubeClient().AppsV1().Deployments(ingressNamespace).Get(context, istiodDeployment, metav1.GetOptions{}) + if err != nil { + e2e.Logf("Failed to get istiod deployment %q: %v; retrying...", istiodDeployment, err) + return false, nil + } + labels := istiod.ObjectMeta.Labels + e2e.Logf("istiod deployment labels: %s", labels) + if value, ok := labels["managed-by"]; ok && value == "sail-library" { + e2e.Logf("The istiod deployment %q is managed by the sail library", istiodDeployment) + return true, nil + } + e2e.Logf("The istiod deployment %q does not have managed-by=sail-library label, retrying...", istiodDeployment) + return false, nil + }) + if waitErr != nil { + return fmt.Errorf("timed out waiting for istiod deployment to have managed-by=sail-library label: %w", waitErr) } return nil }