diff --git a/test/e2e/upgrade/upgrade.go b/test/e2e/upgrade/upgrade.go index 7f1a4b2d452e..86f7032f1634 100644 --- a/test/e2e/upgrade/upgrade.go +++ b/test/e2e/upgrade/upgrade.go @@ -332,89 +332,109 @@ func clusterUpgrade(f *framework.Framework, c configv1client.Interface, dc dynam go monitor.Disrupt(ctx, kubeClient, upgradeDisruptRebootPolicy) // observe the upgrade, taking action as necessary - framework.Logf("Cluster version operator acknowledged upgrade request") - aborted := false - var lastMessage string - upgradeStarted := time.Now() - if err := wait.PollImmediate(10*time.Second, maximumDuration, func() (bool, error) { - cv, msg, err := monitor.Check(updated.Generation, desired) - if msg != "" { - lastMessage = msg - } - if err != nil || cv == nil { - return false, err - } - - if !aborted && monitor.ShouldUpgradeAbort(abortAt) { - framework.Logf("Instructing the cluster to return to %s / %s", original.Status.Desired.Version, original.Status.Desired.Image) - desired = configv1.Update{ - Image: original.Status.Desired.Image, - Force: true, - } - if err := retry.RetryOnConflict(wait.Backoff{Steps: 10, Duration: time.Second}, func() error { - cv, err := c.ConfigV1().ClusterVersions().Get(context.Background(), "version", metav1.GetOptions{}) - if err != nil { - return err + if err := disruption.RecordJUnit( + f, + "[sig-cluster-lifecycle] Cluster completes upgrade", + func() error { + framework.Logf("Cluster version operator acknowledged upgrade request") + aborted := false + var lastMessage string + upgradeStarted := time.Now() + + if err := wait.PollImmediate(10*time.Second, maximumDuration, func() (bool, error) { + cv, msg, err := monitor.Check(updated.Generation, desired) + if msg != "" { + lastMessage = msg + } + if err != nil || cv == nil { + return false, err } - cv.Spec.DesiredUpdate = &desired - cv, err = c.ConfigV1().ClusterVersions().Update(context.Background(), cv, metav1.UpdateOptions{}) - if err == nil { - updated = cv + + if !aborted && monitor.ShouldUpgradeAbort(abortAt) { + framework.Logf("Instructing the cluster to return to %s / %s", original.Status.Desired.Version, original.Status.Desired.Image) + desired = configv1.Update{ + Image: original.Status.Desired.Image, + Force: true, + } + if err := retry.RetryOnConflict(wait.Backoff{Steps: 10, Duration: time.Second}, func() error { + cv, err := c.ConfigV1().ClusterVersions().Get(context.Background(), "version", metav1.GetOptions{}) + if err != nil { + return err + } + cv.Spec.DesiredUpdate = &desired + cv, err = c.ConfigV1().ClusterVersions().Update(context.Background(), cv, metav1.UpdateOptions{}) + if err == nil { + updated = cv + } + return err + }); err != nil { + return false, err + } + aborted = true + return false, nil } - return err + + return monitor.Reached(cv, desired) + }); err != nil { - return false, err + if lastMessage != "" { + return fmt.Errorf("Cluster did not complete upgrade: %v: %s", err, lastMessage) + } + return fmt.Errorf("Cluster did not complete upgrade: %v", err) } - aborted = true - return false, nil - } - - return monitor.Reached(cv, desired) - }); err != nil { - if lastMessage != "" { - return fmt.Errorf("Cluster did not complete upgrade: %v: %s", err, lastMessage) - } - return fmt.Errorf("Cluster did not complete upgrade: %v", err) - } + framework.Logf("Completed upgrade to %s", versionString(desired)) - framework.Logf("Completed upgrade to %s", versionString(desired)) + // record whether the cluster was fast or slow upgrading. Don't fail the test, we still want signal on the actual tests themselves. + upgradeEnded := time.Now() + upgradeDuration := upgradeEnded.Sub(upgradeStarted) + if upgradeDuration > durationToSoftFailure { + disruption.RecordJUnitResult(f, "[sig-cluster-lifecycle] cluster upgrade should be fast", upgradeDuration, fmt.Sprintf("Upgrade took too long: %v", upgradeDuration.Minutes())) + } else { + disruption.RecordJUnitResult(f, "[sig-cluster-lifecycle] cluster upgrade should be fast", upgradeDuration, "") + } - // record whether the cluster was fast or slow upgrading. Don't fail the test, we still want signal on the actual tests themselves. - upgradeEnded := time.Now() - upgradeDuration := upgradeEnded.Sub(upgradeStarted) - if upgradeDuration > durationToSoftFailure { - disruption.RecordJUnitResult(f, "cluster upgrade should be fast", upgradeDuration, fmt.Sprintf("Upgrade took too long: %v", upgradeDuration.Minutes())) - } else { - disruption.RecordJUnitResult(f, "cluster upgrade should be fast", upgradeDuration, "") + return nil + }, + ); err != nil { + return err } - framework.Logf("Waiting on pools to be upgraded") - if err := wait.PollImmediate(10*time.Second, 30*time.Minute, func() (bool, error) { - mcps := dc.Resource(schema.GroupVersionResource{ - Group: "machineconfiguration.openshift.io", - Version: "v1", - Resource: "machineconfigpools", - }) - pools, err := mcps.List(context.Background(), metav1.ListOptions{}) - if err != nil { - framework.Logf("error getting pools %v", err) - return false, nil - } - allUpdated := true - for _, p := range pools.Items { - updated, err := IsPoolUpdated(mcps, p.GetName()) - if err != nil { - framework.Logf("error checking pool %s: %v", p.GetName(), err) - return false, nil + if err := disruption.RecordJUnit( + f, + "[sig-mco] Machine config pools complete upgrade", + func() error { + framework.Logf("Waiting on pools to be upgraded") + if err := wait.PollImmediate(10*time.Second, 30*time.Minute, func() (bool, error) { + mcps := dc.Resource(schema.GroupVersionResource{ + Group: "machineconfiguration.openshift.io", + Version: "v1", + Resource: "machineconfigpools", + }) + pools, err := mcps.List(context.Background(), metav1.ListOptions{}) + if err != nil { + framework.Logf("error getting pools %v", err) + return false, nil + } + allUpdated := true + for _, p := range pools.Items { + updated, err := IsPoolUpdated(mcps, p.GetName()) + if err != nil { + framework.Logf("error checking pool %s: %v", p.GetName(), err) + return false, nil + } + allUpdated = allUpdated && updated + } + return allUpdated, nil + }); err != nil { + return fmt.Errorf("Pools did not complete upgrade: %v", err) } - allUpdated = allUpdated && updated - } - return allUpdated, nil - }); err != nil { - return fmt.Errorf("Pools did not complete upgrade: %v", err) + framework.Logf("All pools completed upgrade") + return nil + }, + ); err != nil { + return err } - framework.Logf("All pools completed upgrade") return nil }