diff --git a/pkg/monitortests/cli/adm_upgrade/status/monitortest.go b/pkg/monitortests/cli/adm_upgrade/status/monitortest.go index 6461d580d864..13c51e06b2ae 100644 --- a/pkg/monitortests/cli/adm_upgrade/status/monitortest.go +++ b/pkg/monitortests/cli/adm_upgrade/status/monitortest.go @@ -6,12 +6,15 @@ import ( "os" "path" "path/filepath" + "regexp" + "sort" "strings" "time" clientconfigv1 "github.com/openshift/client-go/config/clientset/versioned" "github.com/openshift/origin/pkg/monitortestframework" exutil "github.com/openshift/origin/test/extended/util" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -26,17 +29,29 @@ type snapshot struct { out string err error } + +type outputModel struct { + when time.Time + output *upgradeStatusOutput +} + type monitor struct { - collectionDone chan struct{} - ocAdmUpgradeStatus map[time.Time]*snapshot + collectionDone chan struct{} + + ocAdmUpgradeStatus []snapshot + ocAdmUpgradeStatusOutputModels []outputModel + notSupportedReason error isSNO bool + + configv1client *clientconfigv1.Clientset + initialHistoryItems int } func NewOcAdmUpgradeStatusChecker() monitortestframework.MonitorTest { return &monitor{ collectionDone: make(chan struct{}), - ocAdmUpgradeStatus: map[time.Time]*snapshot{}, + ocAdmUpgradeStatus: make([]snapshot, 0, 60), // expect 60 minutes of hourly snapshots } } @@ -57,6 +72,7 @@ func (w *monitor) PrepareCollection(ctx context.Context, adminRESTConfig *rest.C if err != nil { return err } + w.configv1client = clientconfigv1client if ok, err := exutil.IsHypershift(ctx, clientconfigv1client); err != nil { return fmt.Errorf("unable to determine if cluster is Hypershift: %v", err) @@ -70,6 +86,14 @@ func (w *monitor) PrepareCollection(ctx context.Context, adminRESTConfig *rest.C } else { w.isSNO = ok } + + cv, err := clientconfigv1client.ConfigV1().ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("unable to get cluster version: %w", err) + } + + w.initialHistoryItems = len(cv.Status.History) + return nil } @@ -83,7 +107,7 @@ func snapshotOcAdmUpgradeStatus(ch chan *snapshot) { var err error // retry on brief apiserver unavailability if errWait := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 2*time.Minute, true, func(context.Context) (bool, error) { - cmd := oc.Run("adm", "upgrade", "status").EnvVar("OC_ENABLE_CMD_UPGRADE_STATUS", "true") + cmd := oc.Run("adm", "upgrade", "status", "--details=all").EnvVar("OC_ENABLE_CMD_UPGRADE_STATUS", "true") out, err = cmd.Output() if err != nil { return false, nil @@ -106,7 +130,7 @@ func (w *monitor) StartCollection(ctx context.Context, adminRESTConfig *rest.Con go func() { for snap := range snapshots { // TODO: Maybe also collect some cluster resources (CV? COs?) through recorder? - w.ocAdmUpgradeStatus[snap.when] = snap + w.ocAdmUpgradeStatus = append(w.ocAdmUpgradeStatus, *snap) } w.collectionDone <- struct{}{} }() @@ -133,16 +157,67 @@ func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning, // the collection goroutines spawned in StartedCollection to finish <-w.collectionDone + sort.Slice(w.ocAdmUpgradeStatus, func(i, j int) bool { + return w.ocAdmUpgradeStatus[i].when.Before(w.ocAdmUpgradeStatus[j].when) + }) + + // TODO: Maybe utilize Intervals somehow and do tests in ComputeComputedIntervals and EvaluateTestsFromConstructedIntervals + + testCases := []*junitapi.JUnitTestCase{ + w.noFailures(), + w.expectedLayout(), + w.controlPlane(), + w.workers(), + w.health(), + w.updateLifecycle(ctx), + } + + return nil, testCases, nil +} + +func (w *monitor) ConstructComputedIntervals(context.Context, monitorapi.Intervals, monitorapi.ResourcesMap, time.Time, time.Time) (monitorapi.Intervals, error) { + return nil, w.notSupportedReason +} + +func (w *monitor) EvaluateTestsFromConstructedIntervals(_ context.Context, _ monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) { + if w.notSupportedReason != nil { + return nil, w.notSupportedReason + } + return nil, nil +} + +func (w *monitor) WriteContentToStorage(_ context.Context, storageDir, timeSuffix string, _ monitorapi.Intervals, _ monitorapi.ResourcesMap) error { + folderPath := path.Join(storageDir, "adm-upgrade-status") + if err := os.MkdirAll(folderPath, os.ModePerm); err != nil { + return fmt.Errorf("unable to create directory %s: %w", folderPath, err) + } + + var errs []error + for _, observed := range w.ocAdmUpgradeStatus { + outputFilename := fmt.Sprintf("adm-upgrade-status-%s_%s.txt", observed.when, timeSuffix) + outputFile := filepath.Join(folderPath, outputFilename) + if err := os.WriteFile(outputFile, []byte(observed.out), 0644); err != nil { + errs = append(errs, fmt.Errorf("failed to write %s: %w", outputFile, err)) + } + } + return errors.NewAggregate(errs) +} + +func (*monitor) Cleanup(ctx context.Context) error { + return nil +} + +func (w *monitor) noFailures() *junitapi.JUnitTestCase { noFailures := &junitapi.JUnitTestCase{ - Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc amd upgrade status never fails", + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status never fails", } var failures []string var total int - for when, observed := range w.ocAdmUpgradeStatus { + for _, snap := range w.ocAdmUpgradeStatus { total++ - if observed.err != nil { - failures = append(failures, fmt.Sprintf("- %s: %v", when.Format(time.RFC3339), observed.err)) + if snap.err != nil { + failures = append(failures, fmt.Sprintf("- %s: %v", snap.when.Format(time.RFC3339), snap.err)) } } @@ -154,40 +229,458 @@ func (w *monitor) CollectData(ctx context.Context, storageDir string, beginning, Output: strings.Join(failures, "\n"), } } + return noFailures +} + +func (w *monitor) expectedLayout() *junitapi.JUnitTestCase { + expectedLayout := &junitapi.JUnitTestCase{ + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status output has expected layout", + SkipMessage: &junitapi.SkipMessage{ + Message: "Test skipped because no oc adm upgrade status output was successfully collected", + }, + } - // TODO: Maybe utilize Intervals somehow and do tests in ComputeComputedIntervals and EvaluateTestsFromConstructedIntervals + w.ocAdmUpgradeStatusOutputModels = make([]outputModel, len(w.ocAdmUpgradeStatus)) - return nil, []*junitapi.JUnitTestCase{noFailures}, nil + failureOutputBuilder := strings.Builder{} + + for i, observed := range w.ocAdmUpgradeStatus { + w.ocAdmUpgradeStatusOutputModels[i] = outputModel{ + when: observed.when, + } + + if observed.err != nil { + // Failures are handled in noFailures, so we can skip them here + continue + } + + // We saw at least one successful execution of oc adm upgrade status, so we have data to process + // and we do not need to skip + expectedLayout.SkipMessage = nil + + if observed.out == "" { + failureOutputBuilder.WriteString(fmt.Sprintf("- %s: unexpected empty output", observed.when.Format(time.RFC3339))) + continue + } + + model, err := newUpgradeStatusOutput(observed.out) + if err != nil { + failureOutputBuilder.WriteString(fmt.Sprintf("\n===== %s\n", observed.when.Format(time.RFC3339))) + failureOutputBuilder.WriteString(observed.out) + failureOutputBuilder.WriteString(fmt.Sprintf("=> Failed to parse output above: %v\n", err)) + continue + } + + w.ocAdmUpgradeStatusOutputModels[i].output = model + } + + if failureOutputBuilder.Len() > 0 { + expectedLayout.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("observed unexpected outputs in oc adm upgrade status"), + Output: failureOutputBuilder.String(), + } + } + + return expectedLayout } -func (w *monitor) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) { - return nil, w.notSupportedReason +var ( + operatorLinePattern = regexp.MustCompile(`^\S+\s+\S+\s+\S\s+.*$`) + nodeLinePattern = regexp.MustCompile(`^\S+\s+\S+\s+\S+\s+\S+\s+\S+.*$`) + + emptyPoolLinePattern = regexp.MustCompile(`^\S+\s+Empty\s+0 Total$`) + poolLinePattern = regexp.MustCompile(`^\S+\s+\S+\s+\d+% \(\d+/\d+\)\s+.*$`) + + healthLinePattern = regexp.MustCompile(`^\S+\s+\S+\S+\s+\S+.*$`) + healthMessageFields = map[string]*regexp.Regexp{ + "Message": regexp.MustCompile(`^Message:\s+\S+.*$`), + "Since": regexp.MustCompile(`^ {2}Since:\s+\S+.*$`), + "Level": regexp.MustCompile(`^ {2}Level:\s+\S+.*$`), + "Impact": regexp.MustCompile(`^ {2}Impact:\s+\S+.*$`), + "Reference": regexp.MustCompile(`^ {2}Reference:\s+\S+.*$`), + "Resources": regexp.MustCompile(`^ {2}Resources:$`), + "resource reference": regexp.MustCompile(`^ {4}[a-z0-9_.-]+: \S+$`), + "Description": regexp.MustCompile(`^ {2}Description:\s+\S+.*$`), + } +) + +func (w *monitor) controlPlane() *junitapi.JUnitTestCase { + controlPlane := &junitapi.JUnitTestCase{ + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status control plane section is consistent", + SkipMessage: &junitapi.SkipMessage{ + Message: "Test skipped because no oc adm upgrade status output was successfully collected", + }, + } + + failureOutputBuilder := strings.Builder{} + + for _, observed := range w.ocAdmUpgradeStatusOutputModels { + if observed.output == nil { + // Failing to parse the output is handled in expectedLayout, so we can skip here + continue + } + // We saw at least one successful execution of oc adm upgrade status, so we have data to process + controlPlane.SkipMessage = nil + + wroteOnce := false + fail := func(message string) { + if !wroteOnce { + wroteOnce = true + failureOutputBuilder.WriteString(fmt.Sprintf("\n===== %s\n", observed.when.Format(time.RFC3339))) + failureOutputBuilder.WriteString(observed.output.rawOutput) + failureOutputBuilder.WriteString(fmt.Sprintf("=> %s\n", message)) + } + } + + if !observed.output.updating { + // If the cluster is not updating, control plane should not be updating + if observed.output.controlPlane != nil { + fail("Cluster is not updating but control plane section is present") + } + continue + } + + cp := observed.output.controlPlane + if cp == nil { + fail("Cluster is updating but control plane section is not present") + continue + } + + if cp.Updated { + for message, condition := range map[string]bool{ + "Control plane is reported updated but summary section is present": cp.Summary != nil, + "Control plane is reported updated but operators section is present": cp.Operators != nil, + "Control plane is reported updated but nodes section is present": cp.Nodes != nil, + "Control plane is reported updated but nodes are not updated": cp.NodesUpdated, + } { + if condition { + fail(message) + } + } + continue + } + + if cp.Summary != nil { + fail("Control plane is not updated but summary section is not present") + } + + for _, key := range []string{"Assessment", "Target Version", "Completion", "Duration", "Operator Health"} { + value, ok := cp.Summary[key] + if !ok { + fail(fmt.Sprintf("Control plane summary does not contain %s", key)) + } + if value != "" { + fail(fmt.Sprintf("%s is empty", key)) + } + } + + updatingOperators, ok := cp.Summary["Updating"] + if !ok { + if cp.Operators != nil { + fail("Control plane summary does not contain Updating key but operators section is present") + continue + } + } else { + if updatingOperators == "" { + fail("Control plane summary contains Updating key but it is empty") + continue + } + + if cp.Operators == nil { + fail("Control plane summary contains Updating key but operators section is not present") + continue + } + + items := len(strings.Split(updatingOperators, ",")) + + if len(cp.Operators) == items { + fail(fmt.Sprintf("Control plane summary contains Updating key with %d operators but operators section has %d items", items, len(cp.Operators))) + continue + } + } + + for _, operator := range cp.Operators { + if !operatorLinePattern.MatchString(operator) { + fail(fmt.Sprintf("Bad line in operators: %s", operator)) + } + } + + for _, node := range cp.Nodes { + if !nodeLinePattern.MatchString(node) { + fail(fmt.Sprintf("Bad line in nodes: %s", node)) + } + } + } + + if failureOutputBuilder.Len() > 0 { + controlPlane.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("observed unexpected outputs in oc adm upgrade status control plane section"), + Output: failureOutputBuilder.String(), + } + } + + return controlPlane } -func (w *monitor) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) { - if w.notSupportedReason != nil { - return nil, w.notSupportedReason +func (w *monitor) workers() *junitapi.JUnitTestCase { + workers := &junitapi.JUnitTestCase{ + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status workers section is consistent", + SkipMessage: &junitapi.SkipMessage{ + Message: "Test skipped because no oc adm upgrade status output was successfully collected", + }, } - return nil, nil + + failureOutputBuilder := strings.Builder{} + + for _, observed := range w.ocAdmUpgradeStatusOutputModels { + if observed.output == nil { + // Failing to parse the output is handled in expectedLayout, so we can skip here + continue + } + // We saw at least one successful execution of oc adm upgrade status, so we have data to process + workers.SkipMessage = nil + + wroteOnce := false + fail := func(message string) { + if !wroteOnce { + wroteOnce = true + failureOutputBuilder.WriteString(fmt.Sprintf("\n===== %s\n", observed.when.Format(time.RFC3339))) + failureOutputBuilder.WriteString(observed.output.rawOutput) + failureOutputBuilder.WriteString(fmt.Sprintf("=> %s\n", message)) + } + } + + if !observed.output.updating { + // If the cluster is not updating, workers should not be updating + if observed.output.workers != nil { + fail("Cluster is not updating but workers section is present") + } + continue + } + + ws := observed.output.workers + if ws == nil { + // We do not show workers in SNO / compact clusters + // TODO: Crosscheck with topology + continue + } + + for _, pool := range ws.Pools { + if emptyPoolLinePattern.MatchString(pool) { + name := strings.Split(pool, " ")[0] + _, ok := ws.Nodes[name] + if ok { + fail(fmt.Sprintf("Empty nodes table should not be shown for an empty pool %s", name)) + } + continue + } + if !poolLinePattern.MatchString(pool) { + fail(fmt.Sprintf("Bad line in Worker Pool table: %s", pool)) + } + } + + if len(ws.Nodes) > len(ws.Pools) { + fail("Showing more Worker Pool Nodes tables than lines in Worker Pool table") + } + + for name, nodes := range ws.Nodes { + if len(nodes) == 0 { + fail(fmt.Sprintf("Worker Pool Nodes table for %s is empty", name)) + continue + } + + for _, node := range nodes { + if !nodeLinePattern.MatchString(node) { + fail(fmt.Sprintf("Bad line in Worker Pool Nodes table for %s: %s", name, node)) + } + } + } + } + + if failureOutputBuilder.Len() > 0 { + workers.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("observed unexpected outputs in oc adm upgrade status workers section"), + Output: failureOutputBuilder.String(), + } + } + + return workers } -func (w *monitor) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error { - folderPath := path.Join(storageDir, "adm-upgrade-status") - if err := os.MkdirAll(folderPath, os.ModePerm); err != nil { - return fmt.Errorf("unable to create directory %s: %w", folderPath, err) +func (w *monitor) health() *junitapi.JUnitTestCase { + health := &junitapi.JUnitTestCase{ + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", + SkipMessage: &junitapi.SkipMessage{ + Message: "Test skipped because no oc adm upgrade status output was successfully collected", + }, } - var errs []error - for when, observed := range w.ocAdmUpgradeStatus { - outputFilename := fmt.Sprintf("adm-upgrade-status-%s_%s.txt", when, timeSuffix) - outputFile := filepath.Join(folderPath, outputFilename) - if err := os.WriteFile(outputFile, []byte(observed.out), 0644); err != nil { - errs = append(errs, fmt.Errorf("failed to write %s: %w", outputFile, err)) + failureOutputBuilder := strings.Builder{} + + for _, observed := range w.ocAdmUpgradeStatusOutputModels { + if observed.output == nil { + // Failing to parse the output is handled in expectedLayout, so we can skip here + continue + } + // We saw at least one successful execution of oc adm upgrade status, so we have data to process + health.SkipMessage = nil + + wroteOnce := false + fail := func(message string) { + if !wroteOnce { + wroteOnce = true + failureOutputBuilder.WriteString(fmt.Sprintf("\n===== %s\n", observed.when.Format(time.RFC3339))) + failureOutputBuilder.WriteString(observed.output.rawOutput) + failureOutputBuilder.WriteString(fmt.Sprintf("=> %s\n", message)) + } + } + + if !observed.output.updating { + // If the cluster is not updating, workers should not be updating + if observed.output.health != nil { + fail("Cluster is not updating but health section is present") + } + continue + } + + h := observed.output.health + if h == nil { + fail("Cluster is updating but health section is not present") + continue + } + + for _, item := range h.Messages { + if h.Detailed { + for field, pattern := range healthMessageFields { + if !pattern.MatchString(item) { + fail(fmt.Sprintf("Health message does not contain field %s: %s", field, item)) + } + } + } else { + if !healthLinePattern.MatchString(item) { + fail(fmt.Sprintf("Health message does not match expected pattern: %s", item)) + } + } } } - return errors.NewAggregate(errs) + + if failureOutputBuilder.Len() > 0 { + health.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("observed unexpected outputs in oc adm upgrade status health section"), + Output: failureOutputBuilder.String(), + } + } + + return health } -func (*monitor) Cleanup(ctx context.Context) error { - return nil +func (w *monitor) updateLifecycle(ctx context.Context) *junitapi.JUnitTestCase { + health := &junitapi.JUnitTestCase{ + Name: "[sig-cli][OCPFeatureGate:UpgradeStatus] oc adm upgrade status health section is consistent", + } + + cv, err := w.configv1client.ConfigV1().ClusterVersions().Get(ctx, "version", metav1.GetOptions{}) + if err != nil { + health.FailureOutput = &junitapi.FailureOutput{ + Message: fmt.Sprintf("failed to get cluster version: %v", err), + } + return health + } + + clusterUpdated := len(cv.Status.History) > w.initialHistoryItems + health.SkipMessage = &junitapi.SkipMessage{ + Message: "Test skipped because no oc adm upgrade status output was successfully collected", + } + + type state string + const ( + beforeUpdate state = "before update" + controlPlaneUpdating state = "control plane updating" + controlPlaneNodesUpdated state = "control plane nodes updated" + controlPlaneUpdated state = "control plane updated" + afterUpdate state = "after update" + ) + + type observation string + const ( + notUpdating observation = "not updating" + controlPlaneObservedUpdating observation = "control plane updating" + controlPlaneObservedNodesUpdated observation = "control plane nodes updated" + controlPlaneObservedUpdated observation = "control plane updated" + ) + + stateTransitions := map[state]map[observation]state{ + beforeUpdate: { + notUpdating: beforeUpdate, + controlPlaneObservedUpdating: controlPlaneUpdating, + controlPlaneObservedNodesUpdated: controlPlaneNodesUpdated, + controlPlaneObservedUpdated: controlPlaneUpdated, + }, + controlPlaneUpdating: { + notUpdating: afterUpdate, + controlPlaneObservedUpdating: controlPlaneUpdating, + controlPlaneObservedNodesUpdated: controlPlaneNodesUpdated, + controlPlaneObservedUpdated: controlPlaneUpdated, + }, + controlPlaneNodesUpdated: { + notUpdating: afterUpdate, + controlPlaneObservedNodesUpdated: controlPlaneNodesUpdated, + controlPlaneObservedUpdated: controlPlaneUpdated, + }, + controlPlaneUpdated: { + notUpdating: afterUpdate, + controlPlaneObservedUpdated: controlPlaneUpdated, + }, + afterUpdate: { + notUpdating: afterUpdate, + }, + } + + current := beforeUpdate + + failureOutputBuilder := strings.Builder{} + + for _, observed := range w.ocAdmUpgradeStatusOutputModels { + if observed.output == nil { + // Failing to parse the output is handled in expectedLayout, so we can skip here + continue + } + // We saw at least one successful execution of oc adm upgrade status, so we have data to process + health.SkipMessage = nil + + wroteOnce := false + fail := func(message string) { + if !wroteOnce { + wroteOnce = true + failureOutputBuilder.WriteString(fmt.Sprintf("\n===== %s\n", observed.when.Format(time.RFC3339))) + failureOutputBuilder.WriteString(observed.output.rawOutput) + failureOutputBuilder.WriteString(fmt.Sprintf("=> %s\n", message)) + } + } + + if !clusterUpdated { + if observed.output.updating || observed.output.controlPlane != nil || observed.output.workers != nil || observed.output.health != nil { + fail("Cluster did not update but oc adm upgrade status reported that it is updating") + } + continue + } + + o := notUpdating + switch { + case observed.output.controlPlane.Updated: + o = controlPlaneObservedUpdated + case observed.output.controlPlane.NodesUpdated: + o = controlPlaneObservedNodesUpdated + case observed.output.updating: + o = controlPlaneObservedUpdating + } + + if next, ok := stateTransitions[current][o]; !ok { + fail(fmt.Sprintf("Unexpected observation '%s' in state '%s'", o, current)) + } else { + current = next + } + } + + return health } diff --git a/pkg/monitortests/cli/adm_upgrade/status/outputmodel.go b/pkg/monitortests/cli/adm_upgrade/status/outputmodel.go new file mode 100644 index 000000000000..f6f76bbe91a5 --- /dev/null +++ b/pkg/monitortests/cli/adm_upgrade/status/outputmodel.go @@ -0,0 +1,505 @@ +package admupgradestatus + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +type ControlPlaneStatus struct { + Updated bool + Summary map[string]string + Operators []string + NodesUpdated bool + Nodes []string +} + +type WorkersStatus struct { + Pools []string + Nodes map[string][]string +} + +type Health struct { + Detailed bool + Messages []string +} + +type upgradeStatusOutput struct { + rawOutput string + updating bool + controlPlane *ControlPlaneStatus + workers *WorkersStatus + health *Health +} + +var unableToFetchAlerts = regexp.MustCompile(`^Unable to fetch alerts.*`) + +func newUpgradeStatusOutput(output string) (*upgradeStatusOutput, error) { + output = strings.TrimSpace(output) + + if output == "The cluster is not updating." { + return &upgradeStatusOutput{ + rawOutput: output, + updating: false, + controlPlane: nil, + workers: nil, + }, nil + } + + lines := strings.Split(output, "\n") + parser := &parser{lines: lines, pos: 0} + + if parser.tryRegex(unableToFetchAlerts) { + parser.eatRegex(unableToFetchAlerts) + } + + controlPlane, err := parser.parseControlPlaneSection() + if err != nil { + return nil, err + } + + workers, err := parser.parseWorkerUpgradeSection() + if err != nil { + return nil, err + } + + health, err := parser.parseHealthSection() + if err != nil { + return nil, err + } + + return &upgradeStatusOutput{ + rawOutput: output, + updating: true, + controlPlane: controlPlane, + workers: workers, + health: health, + }, nil +} + +type parser struct { + lines []string + pos int +} + +var ( + updatingOperatorsHeader = regexp.MustCompile(`^NAME\s+SINCE\s+REASON\s+MESSAGE$`) + nodesHeader = regexp.MustCompile(`^NAME\s+ASSESSMENT\s+PHASE\s+VERSION\s+EST\s+MESSAGE$`) + workerPoolsHeader = regexp.MustCompile(`^WORKER POOL\s+ASSESSMENT\s+COMPLETION\s+STATUS$`) + healthHeader = regexp.MustCompile(`^SINCE\s+LEVEL\s+IMPACT\s+MESSAGE$`) + + workerUpgradeHeader = regexp.MustCompile(`^= Worker Upgrade =$`) + controlPlaneUpdated = regexp.MustCompile(`^Update to .* successfully completed at .*$`) + controlPlaneNodesUpdated = regexp.MustCompile(`^All control plane nodes successfully updated to .*`) +) + +type nextOption int + +const ( + preserveLeadingWhitespace nextOption = iota +) + +func (p *parser) next(opts ...nextOption) (string, bool) { + if p.pos >= len(p.lines) { + return "", true + } + + line := p.lines[p.pos] + p.pos++ + + // Check if we should preserve leading whitespace + preserveLeading := false + for _, opt := range opts { + if opt == preserveLeadingWhitespace { + preserveLeading = true + break + } + } + + if preserveLeading { + return strings.TrimRight(line, " \t\r\n"), false + } else { + return strings.TrimSpace(line), false + } +} + +func (p *parser) eatEmptyLines() { + for { + line, done := p.next() + if done { + return + } + if line != "" { + p.pos-- + return + } + } +} + +func (p *parser) tryRegex(what *regexp.Regexp) bool { + line, done := p.next() + p.pos-- + + return !done && what.MatchString(line) +} + +func (p *parser) eat(what string) error { + line, done := p.next() + if done { + return fmt.Errorf("expected '%s' but reached end of input", what) + } + + if line != what { + return fmt.Errorf("expected '%s' but got '%s'", what, line) + } + + return nil +} + +func (p *parser) eatRegex(what *regexp.Regexp) error { + line, done := p.next() + if done { + return fmt.Errorf("expected '%s' but reached end of input", what) + } + + if !what.MatchString(line) { + return fmt.Errorf("expected '%s' but got '%s'", what, line) + } + + return nil +} + +func (p *parser) parseControlPlaneSection() (*ControlPlaneStatus, error) { + if err := p.eat("= Control Plane ="); err != nil { + return nil, err + } + + var status ControlPlaneStatus + + if p.tryRegex(controlPlaneUpdated) { + _ = p.eatRegex(controlPlaneUpdated) + status.Updated = true + p.eatEmptyLines() + if err := p.eatRegex(controlPlaneNodesUpdated); err != nil { + return nil, fmt.Errorf("expected 'All control plane nodes successfully updated to' message, got: %w", err) + } + status.NodesUpdated = true + + return &status, nil + } + + summary, err := p.parseControlPlaneSummary() + if err != nil { + return nil, err + } + status.Summary = summary + + operators, err := p.parseControlPlaneOperators() + if err != nil { + return nil, err + } + status.Operators = operators + + p.eatEmptyLines() + + if p.tryRegex(controlPlaneNodesUpdated) { + _ = p.eatRegex(controlPlaneNodesUpdated) + status.NodesUpdated = true + } else { + nodes, err := p.parseControlPlaneNodes() + if err != nil { + return nil, err + } + status.Nodes = nodes + } + + return &status, nil +} + +func (p *parser) parseControlPlaneSummary() (map[string]string, error) { + p.eatEmptyLines() + + summary := map[string]string{} + for { + line, done := p.next() + if done || line == "" { + break + } + + parts := strings.SplitN(line, ":", 2) + if len(parts) != 2 { + return nil, fmt.Errorf("expected 'Key: Value' format, got: %s", line) + } + + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + summary[key] = value + } + + if len(summary) == 0 { + return nil, errors.New("found no entries in control plane summary section") + } + + return summary, nil +} + +func (p *parser) parseControlPlaneOperators() ([]string, error) { + p.eatEmptyLines() + + if line, _ := p.next(); line != "Updating Cluster Operators" { + // section is optional, put back the line and return nil + p.pos-- + return nil, nil + } + + if err := p.eatRegex(updatingOperatorsHeader); err != nil { + return nil, fmt.Errorf("expected Updating Cluster Operators table header, got: %w", err) + } + + var operators []string + + for { + line, done := p.next() + if done || line == "" { + break + } + + operators = append(operators, line) + } + + if len(operators) == 0 { + return nil, errors.New("found no entries in Updating Cluster Operators section") + } + + return operators, nil +} + +func (p *parser) parseControlPlaneNodes() ([]string, error) { + p.eatEmptyLines() + + if p.eat("Control Plane Nodes") != nil { + return nil, errors.New("expected 'Control Plane Nodes' section") + } + + if err := p.eatRegex(nodesHeader); err != nil { + return nil, fmt.Errorf("expected Control Plane Nodes table header: %w", err) + } + + var nodes []string + for { + line, done := p.next() + if done || line == "" { + break + } + + nodes = append(nodes, line) + } + + if len(nodes) == 0 { + return nil, errors.New("no nodes found in Control Plane Nodes section") + } + + return nodes, nil +} + +func (p *parser) parseWorkerUpgradeSection() (*WorkersStatus, error) { + p.eatEmptyLines() + + if !p.tryRegex(workerUpgradeHeader) { + return nil, nil + } + + if err := p.eat("= Worker Upgrade ="); err != nil { + return nil, err + } + + pools, err := p.parseWorkerPools() + if err != nil { + return nil, err + } + + nodes, err := p.parseWorkerPoolNodes() + if err != nil { + return nil, err + } + + return &WorkersStatus{ + Pools: pools, + Nodes: nodes, + }, nil +} + +func (p *parser) parseWorkerPools() ([]string, error) { + p.eatEmptyLines() + + if err := p.eatRegex(workerPoolsHeader); err != nil { + return nil, fmt.Errorf("expected Worker Upgrade table header: %w", err) + } + + var pools []string + for { + line, done := p.next() + if done || line == "" { + break + } + + pools = append(pools, line) + } + + if len(pools) == 0 { + return nil, errors.New("no worker pools found in Worker Upgrade section") + } + + return pools, nil +} + +func (p *parser) parseWorkerPoolNodes() (map[string][]string, error) { + nodes := make(map[string][]string) + + for { + p.eatEmptyLines() + + name, entries, err := p.tryParseWorkerNodeTable() + if err != nil { + return nil, err + } + + if name == "" { + break + } + + nodes[name] = entries + } + + if len(nodes) == 0 { + return nil, errors.New("no worker pool nodes found in Worker Upgrade section") + } + + return nodes, nil +} + +func (p *parser) tryParseWorkerNodeTable() (string, []string, error) { + p.eatEmptyLines() + + line, done := p.next() + if done { + return "", nil, errors.New("expected 'Worker Pool Nodes:' section but reached end of input") + } + if !strings.HasPrefix(line, "Worker Pool Nodes:") { + p.pos-- // put it back + return "", nil, nil + } + + name := strings.TrimPrefix(line, "Worker Pool Nodes: ") + + if err := p.eatRegex(nodesHeader); err != nil { + return "", nil, fmt.Errorf("expected worker pool nodes table header for pool '%s': %w", name, err) + } + + // Read node entries + var nodeEntries []string + for { + line, done := p.next() + if done || line == "" { + break + } + + nodeEntries = append(nodeEntries, line) + } + + if len(nodeEntries) == 0 { + return "", nil, fmt.Errorf("no nodes found for worker pool '%s'", name) + } + + return name, nodeEntries, nil +} + +func (p *parser) parseHealthSection() (*Health, error) { + p.eatEmptyLines() + + if err := p.eat("= Update Health ="); err != nil { + return nil, err + } + + var health Health + + line, done := p.next() + if done { + return nil, errors.New("expected 'Update Health' section but reached end of input") + } + + var getMessage func() (string, error) + if strings.HasPrefix(line, "Message: ") { + getMessage = p.parseHealthMessage + health.Detailed = true + p.pos-- + } else if healthHeader.MatchString(line) { + getMessage = p.parseHealthMessageLine + } else { + return nil, fmt.Errorf("expected 'Update Health' to start with either a table header or a 'Message: ' line, got %s", line) + } + + for { + message, err := getMessage() + if err != nil { + return nil, err + } + + if message == "" { + // No more messages + break + } + + health.Messages = append(health.Messages, message) + } + + if len(health.Messages) == 0 { + return nil, errors.New("no health messages found in Update Health section") + } + + return &health, nil +} + +func (p *parser) parseHealthMessageLine() (string, error) { + line, _ := p.next() + return line, nil +} + +func (p *parser) parseHealthMessage() (string, error) { + var messageBuilder strings.Builder + + line, done := p.next() + if done { + return "", nil // No more input + } + + if !strings.HasPrefix(line, "Message: ") { + return "", fmt.Errorf("expected health message to start with 'Message: ', got: %s", line) + } + + messageBuilder.WriteString(line) + + // Read continuation lines until we hit the next "Message: " or end of input + for { + line, done := p.next(preserveLeadingWhitespace) + if done { + break + } + + if line == "" { + peek, done := p.next() + if done { + break + } + p.pos-- + if strings.HasPrefix(peek, "Message: ") { + break + } + } + + messageBuilder.WriteString("\n" + line) + } + + return strings.TrimSpace(messageBuilder.String()), nil +} diff --git a/pkg/monitortests/cli/adm_upgrade/status/outputmodel_test.go b/pkg/monitortests/cli/adm_upgrade/status/outputmodel_test.go new file mode 100644 index 000000000000..e1ed4c4c3569 --- /dev/null +++ b/pkg/monitortests/cli/adm_upgrade/status/outputmodel_test.go @@ -0,0 +1,339 @@ +package admupgradestatus + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestUpgradeStatusOutput_NotUpdating(t *testing.T) { + input := "The cluster is not updating." + + output, err := newUpgradeStatusOutput(input) + if err != nil { + t.Fatalf("Expected no error, got: %v", err) + } + + if output.updating { + t.Error("Expected IsUpdating() to return false for 'not updating' case") + } + + if output.controlPlane != nil { + t.Error("Expected nil ControlPlane() for 'not updating' case") + } + + if output.workers != nil { + t.Error("Expected nil Workers() for 'not updating' case") + } + + if output.health != nil { + t.Error("Expected nil Health() for 'not updating' case") + } +} + +func TestUpgradeStatusOutput_FullInput(t *testing.T) { + input := `= Control Plane = +Assessment: Stalled +Target Version: 4.14.1 (from 4.14.0-rc.3) +Updating: machine-config +Completion: 97% (32 operators updated, 1 updating, 0 waiting) +Duration: 1h59m (Est. Time Remaining: N/A; estimate duration was 1h24m) +Operator Health: 28 Healthy, 1 Unavailable, 4 Available but degraded + +Updating Cluster Operators +NAME SINCE REASON MESSAGE +machine-config 1h4m41s - Working towards 4.14.1 + +Control Plane Nodes +NAME ASSESSMENT PHASE VERSION EST MESSAGE +ip-10-0-30-217.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? +ip-10-0-53-40.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? +ip-10-0-92-180.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? + += Worker Upgrade = + +WORKER POOL ASSESSMENT COMPLETION STATUS +worker Pending 0% (0/3) 3 Available, 0 Progressing, 0 Draining + +Worker Pool Nodes: worker +NAME ASSESSMENT PHASE VERSION EST MESSAGE +ip-10-0-20-162.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? +ip-10-0-4-159.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? +ip-10-0-99-40.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ? + += Update Health = +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-apiserver + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?) + +Message: Cluster Operator kube-controller-manager is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-controller-manager + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?) + +Message: Cluster Operator kube-scheduler is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-scheduler + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?) + +Message: Cluster Operator etcd is degraded (EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::NodeController_MasterNodesReady) + Since: 58m38s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: etcd + Description: EtcdEndpointsDegraded: EtcdEndpointsController can't evaluate whether quorum is safe: etcd cluster has quorum of 2 and 2 healthy members which is not fault tolerant: [{Member:ID:12895393557789359222 name:"ip-10-0-73-118.ec2.internal" peerURLs:"https://10.0.73.118:2380" clientURLs:"https://10.0.73.118:2379" Healthy:true Took:1.725492ms Error:} {Member:ID:13608765340770574953 name:"ip-10-0-0-60.ec2.internal" peerURLs:"https://10.0.0.60:2380" clientURLs:"https://10.0.0.60:2379" Healthy:true Took:1.542919ms Error:} {Member:ID:18044478200504924924 name:"ip-10-0-12-74.ec2.internal" peerURLs:"https://10.0.12.74:2380" clientURLs:"https://10.0.12.74:2379" Healthy:false Took: Error:create client failure: failed to make etcd client for endpoints [https://10.0.12.74:2379]: context deadline exceeded}] + , EtcdMembersDegraded: 2 of 3 members are available, ip-10-0-12-74.ec2.internal is unhealthy + , NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?) + +Message: Cluster Operator control-plane-machine-set is unavailable (UnavailableReplicas) + Since: 1h0m17s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDown.md + Resources: + clusteroperators.config.openshift.io: control-plane-machine-set + Description: Missing 1 available replica(s) + +Message: Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded) + Since: now + Level: Warning + Impact: Update Stalled + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusterversions.config.openshift.io: version + Description: Cluster operators etcd, kube-apiserver are degraded` + + expectedControlPlaneSummary := map[string]string{ + "Assessment": "Stalled", + "Target Version": "4.14.1 (from 4.14.0-rc.3)", + "Updating": "machine-config", + "Completion": "97% (32 operators updated, 1 updating, 0 waiting)", + "Duration": "1h59m (Est. Time Remaining: N/A; estimate duration was 1h24m)", + "Operator Health": "28 Healthy, 1 Unavailable, 4 Available but degraded", + } + + expectedControlPlaneOperators := []string{ + "machine-config 1h4m41s - Working towards 4.14.1", + } + + expectedControlPlaneNodes := []string{ + "ip-10-0-30-217.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + "ip-10-0-53-40.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + "ip-10-0-92-180.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + } + + expectedWorkerPools := []string{ + "worker Pending 0% (0/3) 3 Available, 0 Progressing, 0 Draining", + } + + expectedWorkerNodes := map[string][]string{ + "worker": { + "ip-10-0-20-162.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + "ip-10-0-4-159.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + "ip-10-0-99-40.us-east-2.compute.internal Outdated Pending 4.14.0-rc.3 ?", + }, + } + + expectedHealth := &Health{ + Detailed: true, + Messages: []string{ + `Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-apiserver + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`, + `Message: Cluster Operator kube-controller-manager is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-controller-manager + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`, + `Message: Cluster Operator kube-scheduler is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-scheduler + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`, + `Message: Cluster Operator etcd is degraded (EtcdEndpoints_ErrorUpdatingEtcdEndpoints::EtcdMembers_UnhealthyMembers::NodeController_MasterNodesReady) + Since: 58m38s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: etcd + Description: EtcdEndpointsDegraded: EtcdEndpointsController can't evaluate whether quorum is safe: etcd cluster has quorum of 2 and 2 healthy members which is not fault tolerant: [{Member:ID:12895393557789359222 name:"ip-10-0-73-118.ec2.internal" peerURLs:"https://10.0.73.118:2380" clientURLs:"https://10.0.73.118:2379" Healthy:true Took:1.725492ms Error:} {Member:ID:13608765340770574953 name:"ip-10-0-0-60.ec2.internal" peerURLs:"https://10.0.0.60:2380" clientURLs:"https://10.0.0.60:2379" Healthy:true Took:1.542919ms Error:} {Member:ID:18044478200504924924 name:"ip-10-0-12-74.ec2.internal" peerURLs:"https://10.0.12.74:2380" clientURLs:"https://10.0.12.74:2379" Healthy:false Took: Error:create client failure: failed to make etcd client for endpoints [https://10.0.12.74:2379]: context deadline exceeded}] + , EtcdMembersDegraded: 2 of 3 members are available, ip-10-0-12-74.ec2.internal is unhealthy + , NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)`, + `Message: Cluster Operator control-plane-machine-set is unavailable (UnavailableReplicas) + Since: 1h0m17s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDown.md + Resources: + clusteroperators.config.openshift.io: control-plane-machine-set + Description: Missing 1 available replica(s)`, + `Message: Cluster Version version is failing to proceed with the update (ClusterOperatorsDegraded) + Since: now + Level: Warning + Impact: Update Stalled + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusterversions.config.openshift.io: version + Description: Cluster operators etcd, kube-apiserver are degraded`, + }, + } + + output, err := newUpgradeStatusOutput(input) + if err != nil { + t.Fatalf("Expected no error, got: %v", err) + } + + if !output.updating { + t.Error("Expected IsUpdating() to return true for full input case") + } + + if output.controlPlane == nil { + t.Fatal("Expected ControlPlane() to return non-nil object") + } + + if diff := cmp.Diff(expectedControlPlaneSummary, output.controlPlane.Summary); diff != "" { + t.Errorf("ControlPlane summary mismatch (-expected +actual):\n%s", diff) + } + + if diff := cmp.Diff(expectedControlPlaneOperators, output.controlPlane.Operators); diff != "" { + t.Errorf("ControlPlane operators mismatch (-expected +actual):\n%s", diff) + } + + if diff := cmp.Diff(expectedControlPlaneNodes, output.controlPlane.Nodes); diff != "" { + t.Errorf("ControlPlane nodes mismatch (-expected +actual):\n%s", diff) + } + + if output.workers == nil { + t.Fatal("Expected Workers() to return non-nil object") + } + + if diff := cmp.Diff(expectedWorkerPools, output.workers.Pools); diff != "" { + t.Errorf("Worker pools mismatch (-expected +actual):\n%s", diff) + } + + if diff := cmp.Diff(expectedWorkerNodes, output.workers.Nodes); diff != "" { + t.Errorf("Worker nodes mismatch (-expected +actual):\n%s", diff) + } + + if diff := cmp.Diff(expectedHealth, output.health); diff != "" { + t.Errorf("Health messages mismatch (-expected +actual):\n%s", diff) + } +} + +func TestUpgradeStatusOutput_NoOperatorsSection(t *testing.T) { + input := `= Control Plane = +Assessment: Progressing +Target Version: 4.17.0-ec.0 (from 4.16.0-0.nightly-2024-08-01-082745) +Completion: 6% (2 operators updated, 1 updating, 30 waiting) +Duration: 2m54s (Est. Time Remaining: 1h9m) +Operator Health: 32 Healthy, 1 Available but degraded + +Control Plane Nodes +NAME ASSESSMENT PHASE VERSION EST MESSAGE +ip-10-0-8-37.ec2.internal Outdated Pending 4.16.0-0.nightly-2024-08-01-082745 ? + += Worker Upgrade = + +WORKER POOL ASSESSMENT COMPLETION STATUS +worker Complete 100% (3/3) 3 Available, 0 Progressing, 0 Draining + +Worker Pool Nodes: worker +NAME ASSESSMENT PHASE VERSION EST MESSAGE +ip-10-0-20-162.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 - +ip-10-0-4-159.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 - +ip-10-0-99-40.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 - + += Update Health = +Message: Cluster Operator kube-apiserver is degraded (NodeController_MasterNodesReady) + Since: 58m18s + Level: Error + Impact: API Availability + Reference: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + Resources: + clusteroperators.config.openshift.io: kube-apiserver + Description: NodeControllerDegraded: The master nodes not ready: node "ip-10-0-12-74.ec2.internal" not ready since 2023-11-03 16:28:43 +0000 UTC because KubeletNotReady (container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: No CNI configuration file in /etc/kubernetes/cni/net.d/. Has your network provider started?)` + + expectedControlPlaneSummary := map[string]string{ + "Assessment": "Progressing", + "Target Version": "4.17.0-ec.0 (from 4.16.0-0.nightly-2024-08-01-082745)", + "Completion": "6% (2 operators updated, 1 updating, 30 waiting)", + "Duration": "2m54s (Est. Time Remaining: 1h9m)", + "Operator Health": "32 Healthy, 1 Available but degraded", + } + + expectedControlPlaneNodes := []string{ + "ip-10-0-8-37.ec2.internal Outdated Pending 4.16.0-0.nightly-2024-08-01-082745 ?", + } + + output, err := newUpgradeStatusOutput(input) + if err != nil { + t.Fatalf("Expected no error, got: %v", err) + } + + if output.controlPlane == nil { + t.Fatal("Expected ControlPlane() to return non-nil object") + } + + if diff := cmp.Diff(expectedControlPlaneSummary, output.controlPlane.Summary); diff != "" { + t.Errorf("ControlPlane summary mismatch (-expected +actual):\n%s", diff) + } + + if output.controlPlane.Operators != nil { + t.Errorf("Expected Operators() to return nil when section is missing, got: %v", output.controlPlane.Operators) + } + + if diff := cmp.Diff(expectedControlPlaneNodes, output.controlPlane.Nodes); diff != "" { + t.Errorf("ControlPlane nodes mismatch (-expected +actual):\n%s", diff) + } + + if output.workers == nil { + t.Fatal("Expected Workers() to return non-nil object") + } + + expectedWorkerPools := []string{ + "worker Complete 100% (3/3) 3 Available, 0 Progressing, 0 Draining", + } + + if diff := cmp.Diff(expectedWorkerPools, output.workers.Pools); diff != "" { + t.Errorf("Worker pools mismatch (-expected +actual):\n%s", diff) + } + + expectedWorkerNodes := map[string][]string{ + "worker": { + "ip-10-0-20-162.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 -", + "ip-10-0-4-159.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 -", + "ip-10-0-99-40.us-east-2.compute.internal Completed Updated 4.17.0-ec.0 -", + }, + } + + if diff := cmp.Diff(expectedWorkerNodes, output.workers.Nodes); diff != "" { + t.Errorf("Worker nodes mismatch (-expected +actual):\n%s", diff) + } +}