diff --git a/go.mod b/go.mod index 50286e68b5..02c9dc0409 100644 --- a/go.mod +++ b/go.mod @@ -46,6 +46,7 @@ require ( github.com/spf13/cobra v1.9.1 github.com/spf13/pflag v1.0.6 github.com/stretchr/testify v1.10.0 + github.com/tidwall/gjson v1.14.2 github.com/tidwall/sjson v1.2.5 github.com/vincent-petithory/dataurl v1.0.0 github.com/vmware/govmomi v0.45.1 @@ -179,7 +180,6 @@ require ( github.com/sigstore/rekor v1.3.10 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/stoewer/go-strcase v1.3.0 // indirect - github.com/tidwall/gjson v1.14.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/timonwong/loggercheck v0.10.1 // indirect diff --git a/test/extended-priv/configmap.go b/test/extended-priv/configmap.go index c037d9c922..187f28f446 100644 --- a/test/extended-priv/configmap.go +++ b/test/extended-priv/configmap.go @@ -2,6 +2,7 @@ package extended import ( "encoding/json" + "fmt" o "github.com/onsi/gomega" exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" @@ -27,6 +28,24 @@ func NewConfigMapList(oc *exutil.CLI, namespace string) *ConfigMapList { return &ConfigMapList{ResourceList: *NewNamespacedResourceList(oc, "ConfigMap", namespace)} } +// GetDataValue returns the value of a specific key in the .data field +func (cm *ConfigMap) GetDataValue(key string) (string, error) { + // We cant use the "resource.Get" method, because exutil.client will trim the output, removing spaces and newlines that could be important in a configuration. + dataMap, err := cm.GetDataMap() + + if err != nil { + return "", err + } + + data, ok := dataMap[key] + if !ok { + return "", fmt.Errorf("Key %s does not exist in the .data in Configmap -n %s %s", + key, cm.GetNamespace(), cm.GetName()) + } + + return data, nil +} + // GetDataMap returns the valus in the .data field as a map[string][string] func (cm *ConfigMap) GetDataMap() (map[string]string, error) { data := map[string]string{} @@ -42,6 +61,16 @@ func (cm *ConfigMap) GetDataMap() (map[string]string, error) { return data, nil } +// GetDataValueOrFail returns the value of a specific key in the .data field and fails the test if any error happens +func (cm *ConfigMap) GetDataValueOrFail(key string) string { + value, err := cm.GetDataValue(key) + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), + "Could get the value for key %s in configmap -n %s %s", + key, cm.GetNamespace(), cm.GetName()) + + return value +} + // GetAll returns a []ConfigMap list with all existing pinnedimageset sorted by creation timestamp func (cml *ConfigMapList) GetAll() ([]ConfigMap, error) { cml.ResourceList.SortByTimestamp() diff --git a/test/extended-priv/const.go b/test/extended-priv/const.go index 69461ccb4b..7d65dc868c 100644 --- a/test/extended-priv/const.go +++ b/test/extended-priv/const.go @@ -3,6 +3,9 @@ package extended import "time" const ( + // Ignition default version + IgnitionDefaultVersion = "3.5.0" + // MachineConfigNamespace mco namespace MachineConfigNamespace = "openshift-machine-config-operator" // MachineConfigDaemon mcd container name @@ -27,7 +30,17 @@ const ( // LayeringBaseImageReleaseInfo is the name of the layering base image in release info LayeringBaseImageReleaseInfo = "rhel-coreos" - GenericMCTemplate = "generic-machine-config-template.yml" + // GenericMCTemplate is the name of a MachineConfig template that can be fully configured by parameters + GenericMCTemplate = "generic-machine-config-template.yml" + + // AWSPlatform value used to identify aws infrastructure + AWSPlatform = "aws" + // GCPPlatform value used to identify gcp infrastructure + GCPPlatform = "gcp" + // AzurePlatform value used to identify azure infrastructure + AzurePlatform = "azure" + // VspherePlatform value used to identify Vsphere infrastructure + VspherePlatform = "vsphere" // ExpirationDockerfileLabel Expiration label in Dockerfile ExpirationDockerfileLabel = `LABEL maintainer="mco-qe-team" quay.expires-after=24h` @@ -48,4 +61,18 @@ const ( // DefaultExpectTimeout is the default timeout for expect operations DefaultExpectTimeout = 10 * time.Second + + // MachineAPINamespace is the MachineAPI namespace + MachineAPINamespace = "openshift-machine-api" + + // We use full name to get machineset/machine xref: https://access.redhat.com/solutions/7040368 + // MachineSetFullName is the machineset fully qualified name + MachineSetFullName = "machineset.machine.openshift.io" + // MachineFullName is the machine fully qualified name + MachineFullName = "machine.machine.openshift.io" + + // MachineSetResource is the resource name for machinesets + MachineSetResource = "machinesets" + // ControlPlaneMachineSetResource is the resource name for controlplanemachinesets + ControlPlaneMachineSetResource = "controlplanemachinesets" ) diff --git a/test/extended-priv/controlplanemachineset.go b/test/extended-priv/controlplanemachineset.go new file mode 100644 index 0000000000..29dec92d75 --- /dev/null +++ b/test/extended-priv/controlplanemachineset.go @@ -0,0 +1,384 @@ +package extended + +import ( + "context" + "fmt" + "strconv" + "time" + + o "github.com/onsi/gomega" + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + "github.com/openshift/machine-config-operator/test/extended-priv/util/architecture" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "github.com/tidwall/gjson" + "k8s.io/apimachinery/pkg/util/wait" + e2e "k8s.io/kubernetes/test/e2e/framework" +) + +const ( + // ControlPlaneMachineSetName is the name of the singleton ControlPlaneMachineSet resource + ControlPlaneMachineSetName = "cluster" +) + +// BootImageResource is an interface for resources that have boot images (MachineSet and ControlPlaneMachineSet) +type BootImageResource interface { + GetOC() *exutil.CLI + GetArchitectureOrFail() architecture.Architecture + GetCoreOsBootImage() (string, error) + String() string +} + +// ControlPlaneMachineSet struct to handle ControlPlaneMachineSet resources +type ControlPlaneMachineSet struct { + Resource +} + +// ControlPlaneMachineSetList struct to handle lists of ControlPlaneMachineSet resources +type ControlPlaneMachineSetList struct { + ResourceList +} + +// NewControlPlaneMachineSet constructs a new ControlPlaneMachineSet struct +func NewControlPlaneMachineSet(oc *exutil.CLI, namespace, name string) *ControlPlaneMachineSet { + return &ControlPlaneMachineSet{*NewNamespacedResource(oc, "controlplanemachineset", namespace, name)} +} + +// NewControlPlaneMachineSetList constructs a new ControlPlaneMachineSetList struct to handle all existing ControlPlaneMachineSets +func NewControlPlaneMachineSetList(oc *exutil.CLI, namespace string) *ControlPlaneMachineSetList { + return &ControlPlaneMachineSetList{*NewNamespacedResourceList(oc, "controlplanemachineset", namespace)} +} + +// GetState returns the state of the ControlPlaneMachineSet (Active or Inactive) +func (cpms ControlPlaneMachineSet) GetState() (string, error) { + return cpms.Get(`{.spec.state}`) +} + +// GetStateOrFail returns the state of the ControlPlaneMachineSet and fails the test if any error happens +func (cpms ControlPlaneMachineSet) GetStateOrFail() string { + state, err := cpms.GetState() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting state from %s", cpms) + return state +} + +// IsActive returns true if the ControlPlaneMachineSet is Active +func (cpms ControlPlaneMachineSet) IsActive() bool { + state, err := cpms.GetState() + if err != nil { + logger.Errorf("Error getting state: %s", err) + return false + } + return state == "Active" +} + +// GetReplicas returns the number of replicas configured +func (cpms ControlPlaneMachineSet) GetReplicas() (int, error) { + replicasStr, err := cpms.Get(`{.spec.replicas}`) + if err != nil { + return -1, err + } + return strconv.Atoi(replicasStr) +} + +// GetReplicasOrFail returns the number of replicas and fails the test if any error happens +func (cpms ControlPlaneMachineSet) GetReplicasOrFail() int { + replicas, err := cpms.GetReplicas() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting replicas from %s", cpms) + return replicas +} + +// GetReadyReplicas returns the number of ready replicas +func (cpms ControlPlaneMachineSet) GetReadyReplicas() (int, error) { + readyReplicasStr, err := cpms.Get(`{.status.readyReplicas}`) + if err != nil { + return -1, err + } + if readyReplicasStr == "" { + return 0, nil + } + return strconv.Atoi(readyReplicasStr) +} + +// GetUpdatedReplicas returns the number of updated replicas +func (cpms ControlPlaneMachineSet) GetUpdatedReplicas() (int, error) { + updatedReplicasStr, err := cpms.Get(`{.status.updatedReplicas}`) + if err != nil { + return -1, err + } + if updatedReplicasStr == "" { + return 0, nil + } + return strconv.Atoi(updatedReplicasStr) +} + +// GetIsReady returns true if the ControlPlaneMachineSet instances are ready +func (cpms ControlPlaneMachineSet) GetIsReady() bool { + configuredReplicas, err := cpms.GetReplicas() + if err != nil { + logger.Infof("Cannot get configured replicas. Error: %s", err) + return false + } + + readyReplicas, err := cpms.GetReadyReplicas() + if err != nil { + logger.Infof("Cannot get ready replicas. Error: %s", err) + return false + } + + updatedReplicas, err := cpms.GetUpdatedReplicas() + if err != nil { + logger.Infof("Cannot get updated replicas. Error: %s", err) + return false + } + + logger.Infof("ConfiguredReplicas: %d, ReadyReplicas: %d, UpdatedReplicas: %d", configuredReplicas, readyReplicas, updatedReplicas) + + return configuredReplicas == readyReplicas && readyReplicas == updatedReplicas +} + +// WaitUntilReady waits until the ControlPlaneMachineSet reports a Ready status +func (cpms ControlPlaneMachineSet) WaitUntilReady(duration string) error { + pDuration, err := time.ParseDuration(duration) + if err != nil { + logger.Errorf("Error parsing duration %s. Error: %s", duration, err) + return err + } + + immediate := false + pollerr := wait.PollUntilContextTimeout(context.TODO(), 20*time.Second, pDuration, immediate, func(_ context.Context) (bool, error) { + return cpms.GetIsReady(), nil + }) + + return pollerr +} + +// GetCoreOsBootImage returns the configured coreOsBootImage in this ControlPlaneMachineSet +func (cpms ControlPlaneMachineSet) GetCoreOsBootImage() (string, error) { + // the coreOs boot image is stored differently in the ControlPlaneMachineSet spec depending on the platform + coreOsBootImagePath := "" + switch p := exutil.CheckPlatform(cpms.oc); p { + case AWSPlatform: + coreOsBootImagePath = `{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.ami.id}` + case GCPPlatform: + // For GCP, dynamically find the boot disk index + bootDiskIndex, err := GCPGetControlPlaneMachinesetBootDiskIndex(cpms) + if err != nil { + return "", err + } + coreOsBootImagePath = fmt.Sprintf(`{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.disks[%d].image}`, bootDiskIndex) + case VspherePlatform: + coreOsBootImagePath = `{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.template}` + case AzurePlatform: + coreOsBootImagePath = `{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.image}` + default: + e2e.Failf("ControlPlaneMachineSet.GetCoreOsBootImage method is only supported for GCP, Vsphere, Azure and AWS infrastructure") + } + + return cpms.Get(coreOsBootImagePath) +} + +// GetCoreOsBootImageOrFail returns the configured coreOsBootImage in this ControlPlaneMachineSet and fails the test case if any error happened +func (cpms ControlPlaneMachineSet) GetCoreOsBootImageOrFail() string { + img, err := cpms.GetCoreOsBootImage() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting the coreos boot image value in %s", cpms) + return img +} + +// SetCoreOsBootImage sets the value of the configured coreos boot image +func (cpms ControlPlaneMachineSet) SetCoreOsBootImage(coreosBootImage string) error { + // the coreOs boot image is stored differently in the ControlPlaneMachineSet spec depending on the platform + patchCoreOsBootImagePath := "" + switch p := exutil.CheckPlatform(cpms.oc); p { + case AWSPlatform: + patchCoreOsBootImagePath = "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/ami/id" + case GCPPlatform: + // For GCP, dynamically find the boot disk index + bootDiskIndex, err := GCPGetControlPlaneMachinesetBootDiskIndex(cpms) + if err != nil { + return err + } + patchCoreOsBootImagePath = fmt.Sprintf("/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/disks/%d/image", bootDiskIndex) + case VspherePlatform: + patchCoreOsBootImagePath = "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/template" + case AzurePlatform: + patchCoreOsBootImagePath = "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/image" + default: + e2e.Failf("ControlPlaneMachineSet.SetCoreOsBootImage method is only supported for GCP, Vsphere, Azure and AWS platforms") + } + + return cpms.Patch("json", fmt.Sprintf(`[{"op": "add", "path": "%s", "value": %s}]`, + patchCoreOsBootImagePath, QuoteIfNotJSON(coreosBootImage))) +} + +// GetArchitecture returns the architecture for this ControlPlaneMachineSet +func (cpms ControlPlaneMachineSet) GetArchitecture() (architecture.Architecture, error) { + platform := exutil.CheckPlatform(cpms.GetOC()) + if platform == VspherePlatform { + // In vsphere only the AMD64 architecture is supported + return architecture.AMD64, nil + } + + // Get machines created by the ControlPlaneMachineSet + machines, err := cpms.GetMachines() + if err != nil { + return architecture.UNKNOWN, err + } + + if len(machines) == 0 { + return architecture.UNKNOWN, fmt.Errorf("ControlPlaneMachineSet %s has no machines, so we cannot get the architecture from any existing machine", cpms.GetName()) + } + + // Get the node associated with the first machine + node, err := machines[0].GetNode() + if err != nil { + return architecture.UNKNOWN, err + } + + return node.GetArchitecture() +} + +// GetArchitectureOrFail returns the architecture and fails the test if any error happens +func (cpms ControlPlaneMachineSet) GetArchitectureOrFail() architecture.Architecture { + arch, err := cpms.GetArchitecture() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting the architecture in %s", cpms) + return arch +} + +// GetUserDataSecretName returns the name of the secret used for user-data +func (cpms ControlPlaneMachineSet) GetUserDataSecretName() (string, error) { + return cpms.Get(`{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.userDataSecret.name}`) +} + +// GetUserDataSecret returns the secret used for user-data +func (cpms ControlPlaneMachineSet) GetUserDataSecret() (*Secret, error) { + secretName, err := cpms.GetUserDataSecretName() + if err != nil { + return nil, err + } + return NewSecret(cpms.GetOC(), MachineAPINamespace, secretName), nil +} + +// SetUserDataSecret configures the ControlPlaneMachineSet to use the provided user-data secret in the machine-api namespace +func (cpms ControlPlaneMachineSet) SetUserDataSecret(userDataSecretName string) error { + return cpms.Patch("json", `[{ "op": "replace", "path": "/spec/template/machines_v1beta1_machine_openshift_io/spec/providerSpec/value/userDataSecret/name", "value": "`+userDataSecretName+`" }]`) +} + +// GetMachines returns a slice with the machines created for this ControlPlaneMachineSet +func (cpms ControlPlaneMachineSet) GetMachines() ([]Machine, error) { + ml := NewMachineList(cpms.oc, cpms.GetNamespace()) + ml.ByLabel("machine.openshift.io/cluster-api-machine-role=master") + ml.ByLabel("machine.openshift.io/cluster-api-machine-type=master") + ml.SortByTimestamp() + return ml.GetAll() +} + +// GetMachinesOrFail get machines from ControlPlaneMachineSet or fail the test if any error occurred +func (cpms ControlPlaneMachineSet) GetMachinesOrFail() []Machine { + ml, err := cpms.GetMachines() + o.Expect(err).NotTo(o.HaveOccurred(), "Get machines of ControlPlaneMachineSet %s failed", cpms.GetName()) + return ml +} + +// GetNodes returns a slice with all nodes that have been created for this ControlPlaneMachineSet +func (cpms ControlPlaneMachineSet) GetNodes() ([]*Node, error) { + machines, mErr := cpms.GetMachines() + if mErr != nil { + return nil, mErr + } + + nodes := []*Node{} + for _, m := range machines { + n, nErr := m.GetNode() + if nErr != nil { + return nil, nErr + } + + nodes = append(nodes, n) + } + return nodes, nil +} + +// GetNodesOrFail returns a slice with all nodes that have been created for this ControlPlaneMachineSet and fails the test if any error happens +func (cpms ControlPlaneMachineSet) GetNodesOrFail() []*Node { + nodes, err := cpms.GetNodes() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting the nodes that belong to %s", cpms) + return nodes +} + +// GetAll returns a []ControlPlaneMachineSet list with all existing ControlPlaneMachineSets +func (cpmsl *ControlPlaneMachineSetList) GetAll() ([]ControlPlaneMachineSet, error) { + allCPMSResources, err := cpmsl.ResourceList.GetAll() + if err != nil { + return nil, err + } + allCPMS := make([]ControlPlaneMachineSet, 0, len(allCPMSResources)) + + for _, cpmsRes := range allCPMSResources { + allCPMS = append(allCPMS, *NewControlPlaneMachineSet(cpmsl.oc, cpmsRes.GetNamespace(), cpmsRes.GetName())) + } + + return allCPMS, nil +} + +// GetAllOrFail returns a []ControlPlaneMachineSet list with all existing ControlPlaneMachineSets and fail the test if it is not possible +func (cpmsl *ControlPlaneMachineSetList) GetAllOrFail() []ControlPlaneMachineSet { + allCpms, err := cpmsl.GetAll() + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error getting the list of existing ControlPlaneMachineSets") + + return allCpms +} + +// DeleteOneMachineAndWaitForRecreation deletes one machine from the ControlPlaneMachineSet and waits for a new one to be created and ready +// This function validates that the CPMS correctly recreates deleted machines +func DeleteOneMachineAndWaitForRecreation(cpms *ControlPlaneMachineSet) { + machines := cpms.GetMachinesOrFail() + o.Expect(machines).To(o.HaveLen(3), "Expected 3 control plane machines") + + machineToDelete := machines[0] + logger.Infof("Deleting machine %s", machineToDelete.GetName()) + o.Expect(machineToDelete.Delete("--wait=false")).To(o.Succeed(), "Error deleting machine %s", machineToDelete) + + // Wait for the machine to be deleted + o.Eventually(machineToDelete.Exists, "30m", "30s").Should(o.BeFalse(), + "Machine %s was not deleted", machineToDelete) + logger.Infof("Machine %s deleted", machineToDelete.GetName()) + + // Wait for the machines to be created and ready + o.Eventually(func(gm o.Gomega) { + expectedNumMachines := 3 + currentMachines := cpms.GetMachinesOrFail() + gm.Expect(currentMachines).To(o.HaveLen(expectedNumMachines), + "Wrong number of controlplane machines") + + // Check all machines are ready + for _, m := range currentMachines { + gm.Expect(m.IsRunning()).To(o.BeTrue(), + "Machine %s is not running yet", m.GetName()) + } + }, "5m", "30s").Should(o.Succeed(), "New machine was not created or is not ready") + logger.Infof("New machine created and ready") + + // Wait for ControlPlaneMachineSet to be ready + o.Eventually(cpms.GetIsReady, "5m", "30s").Should(o.BeTrue(), + "ControlPlaneMachineSet %s is not ready after machine replacement", cpms.GetName()) +} + +// GCPGetControlPlaneMachinesetBootDiskIndex returns the index of the boot disk in the GCP disks array +// In GCP, the boot disk is identified by the "boot: true" field in the disk specification +func GCPGetControlPlaneMachinesetBootDiskIndex(cpms ControlPlaneMachineSet) (int, error) { + // Get the disks array from the ControlPlaneMachineSet spec + disksJSON, err := cpms.Get(`{.spec.template.machines_v1beta1_machine_openshift_io.spec.providerSpec.value.disks}`) + if err != nil { + return -1, fmt.Errorf("Failed to get disks array: %v", err) + } + + // Parse the disks array and find the boot disk + disks := gjson.Parse(disksJSON).Array() + for i, disk := range disks { + if disk.Get("boot").Bool() { + logger.Infof("Found boot disk at index %d", i) + return i, nil + } + } + + return -1, fmt.Errorf("No boot disk found in disks array (no disk with boot: true)") +} diff --git a/test/extended-priv/gomega_json_matcher.go b/test/extended-priv/gomega_json_matcher.go new file mode 100644 index 0000000000..f4431f9ffc --- /dev/null +++ b/test/extended-priv/gomega_json_matcher.go @@ -0,0 +1,123 @@ +package extended + +import ( + "fmt" + + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "github.com/tidwall/gjson" + + gomegamatchers "github.com/onsi/gomega/matchers" + "github.com/onsi/gomega/types" +) + +// How to use this matcher +// +// json := ` +// { +// "name": "Alice", +// "age": 30, +// "height": 1.68, +// "skills": ["Go", "Python", "JavaScript"], +// "scores": [95, 88, 76, 100], +// "contact": { +// "email": "alice@example.com", +// "phone": "+1-202-555-0184" +// }, +// "projects": [ +// { +// "title": "Inventory App", +// "lines_of_code": 2500, +// "technologies": ["Go", "React"] +// }, +// { +// "title": "Data Pipeline", +// "lines_of_code": 4300, +// "technologies": ["Python", "Airflow"] +// } +// ], +// "settings": { +// "notifications": true, +// "theme": "dark", +// "preferred_numbers": [3, 7, 42] +// }, +// "extra": [] +// } +// +// ` +// +// o.Expect(json).To(HavePathWithValue("age", float64(30))) +// o.Expect(json).To(HavePathWithValue("age", o.BeEquivalentTo(30))) +// o.Expect(json).To(HavePathWithValue("age", o.BeNumerically("==", 30) )) +// o.Expect(json).To(HavePathWithValue("name", "Alice")) +// o.Expect(json).To(HavePathWithValue("height", float64(1.68))) +// o.Expect(json).To(HavePathWithValue("height", o.BeNumerically(">", 1) )) +// o.Expect(json).To(HavePathWithValue("skills", []interface{}{"Go", "Python", "JavaScript"})) +// o.Expect(json).To(HavePathWithValue("skills", o.ConsistOf("Go", "Python", "JavaScript"))) +// o.Expect(json).To(HavePathWithValue("scores", []interface{}{float64(95), 88.0, 76.0, 100.0})) +// o.Expect(json).To(HavePathWithValue("scores", o.ConsistOf(95.0, float64(88), o.BeEquivalentTo(76), o.BeEquivalentTo(100)))) +// o.Expect(json).To(HavePathWithValue("skills", o.HaveLen(3))) +// o.Expect(json).To(HavePathWithValue("scores", o.ContainElement(float64(88)))) +// o.Expect(json).To(HavePathWithValue("scores", o.ContainElements(88.0, 100.0))) +// o.Expect(json).To(HavePathWithValue("extra", o.HaveLen(0))) +// o.Expect(json).To(HavePathWithValue("settings.notifications", o.BeTrue())) + +// struct implementing gomaega matcher interface +type gjsonMatcher struct { + path string + data interface{} + expected interface{} + expectedMatcher types.GomegaMatcher +} + +// Match checks if the condition matches the given json path. The json information matched is always treated as a string. +func (matcher *gjsonMatcher) Match(actual interface{}) (success bool, err error) { + + // Check that the checked value is a string + strJSON, ok := actual.(string) + logger.Debugf("Matched JSON: %s", strJSON) + if !ok { + return false, fmt.Errorf(`Wrong type. Matcher expects a type "string": %s`, actual) + } + + if !gjson.Valid(strJSON) { + return false, fmt.Errorf(`Wrong format. The string is not a valid JSON: %s`, strJSON) + } + data := gjson.Get(strJSON, matcher.path) + if !data.Exists() { + return false, fmt.Errorf(`The matched path %s does not exist in the provided JSON: %s`, matcher.path, strJSON) + } + matcher.data = data.Value() + + // Guess if we provided a value or another matcher in order to check the condition + var isMatcher bool + matcher.expectedMatcher, isMatcher = matcher.expected.(types.GomegaMatcher) + if !isMatcher { + matcher.expectedMatcher = &gomegamatchers.EqualMatcher{Expected: matcher.expected} + } + + return matcher.expectedMatcher.Match(matcher.data) +} + +// FailureMessage returns the message when testing `Should` case and `Match` returned false +func (matcher *gjsonMatcher) FailureMessage(actual interface{}) (message string) { + // The type was already validated in Match, we can safely ignore the error + strJSON, _ := actual.(string) + message = fmt.Sprintf("%s\n, the matcher was not satisfied by the path %s in json %s", + matcher.expectedMatcher.FailureMessage(matcher.data), matcher.path, strJSON) + return message +} + +// FailureMessage returns the message when testing `ShouldNot` case and `Match` returned true +func (matcher *gjsonMatcher) NegatedFailureMessage(actual interface{}) (message string) { + // The type was already validated in Match, we can safely ignore the error + strJSON, _ := actual.(string) + message = fmt.Sprintf("%s\n, the matcher was satisfied (but it should NOT) by the path %s in json %s", + matcher.expectedMatcher.NegatedFailureMessage(matcher.data), matcher.path, strJSON) + + return message +} + +// HavePathWithValue returns the gomega matcher to check if a path in a json data matches the given condition +func HavePathWithValue(path string, expected interface{}) types.GomegaMatcher { + return &gjsonMatcher{path: path, expected: expected} +} diff --git a/test/extended-priv/machine.go b/test/extended-priv/machine.go new file mode 100644 index 0000000000..404a219886 --- /dev/null +++ b/test/extended-priv/machine.go @@ -0,0 +1,91 @@ +package extended + +import ( + "fmt" + + o "github.com/onsi/gomega" + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" +) + +// Machine struct to handle Machine resources +type Machine struct { + Resource +} + +// MachineList struct to handle lists of Machine resources +type MachineList struct { + ResourceList +} + +// NewMachine constructs a new Machine struct +func NewMachine(oc *exutil.CLI, namespace, name string) *Machine { + return &Machine{*NewNamespacedResource(oc, MachineFullName, namespace, name)} +} + +// GetNode returns the node created by this machine +func (m Machine) GetNode() (*Node, error) { + nodeList := NewNodeList(m.oc) + nodeList.SetItemsFilter(`?(@.metadata.annotations.machine\.openshift\.io/machine=="openshift-machine-api/` + m.GetName() + `")`) + nodes, nErr := nodeList.GetAll() + if nErr != nil { + return nil, nErr + } + numNodes := len(nodes) + if numNodes > 1 { + return nil, fmt.Errorf("More than one nodes linked to this Machine. Machine: %s. Num nodes:%d", + m.GetName(), numNodes) + } + + if numNodes == 0 { + return nil, fmt.Errorf("No node linked to this Machine. Machine: %s", m.GetName()) + } + + return &(nodes[0]), nil +} + +// GetNodeOrFail, call GetNode, fail the test if any error occurred +func (m Machine) GetNodeOrFail() *Node { + node, err := m.GetNode() + o.Expect(err).NotTo(o.HaveOccurred(), "Get node from machine %s failed", m.GetName()) + return node +} + +// GetPhase get phase of the machine +func (m Machine) GetPhase() (string, error) { + phase, err := m.Get(`{.status.phase}`) + if err != nil { + return "", err + } + logger.Infof("machine %s phase is %s", m.GetName(), phase) + return phase, nil +} + +// IsRunning returns true if the machine phase is "Running" +func (m Machine) IsRunning() (bool, error) { + phase, err := m.GetPhase() + if err != nil { + return false, err + } + return phase == "Running", nil +} + +// NewMachineList constructs a new MachineList struct to handle all existing Machines +func NewMachineList(oc *exutil.CLI, namespace string) *MachineList { + return &MachineList{*NewNamespacedResourceList(oc, MachineFullName, namespace)} +} + +// GetAll returns a []Machine slice with all existing nodes +func (ml MachineList) GetAll() ([]Machine, error) { + allMResources, err := ml.ResourceList.GetAll() + if err != nil { + return nil, err + } + allMs := make([]Machine, 0, len(allMResources)) + + for _, mRes := range allMResources { + allMs = append(allMs, *NewMachine(ml.oc, mRes.GetNamespace(), mRes.GetName())) + } + + return allMs, nil +} diff --git a/test/extended-priv/machineconfigpool.go b/test/extended-priv/machineconfigpool.go index a8b6a29352..e54b410eb4 100644 --- a/test/extended-priv/machineconfigpool.go +++ b/test/extended-priv/machineconfigpool.go @@ -600,7 +600,11 @@ func (mcp *MachineConfigPool) RecoverFromDegraded() error { mcpNodes, _ := mcp.GetNodes() for _, node := range mcpNodes { logger.Infof("Restoring desired config in node: %s", node) - if node.IsUpdated() { + isUpdated, err := node.IsUpdated() + if err != nil { + return fmt.Errorf("Error checking if node %s is updated: %s", node.GetName(), err) + } + if isUpdated { logger.Infof("node is updated, don't need to recover") } else { err := node.RestoreDesiredConfig() @@ -814,9 +818,13 @@ func DebugDegradedStatus(mcp *MachineConfigPool) { allNodes, err := nodeList.GetAll() if err == nil { for _, node := range allNodes { - state := node.GetMachineConfigState() - if state != "Done" { - logger.Infof("NODE %s IS %s", node.GetName(), state) + state, err := node.GetMachineConfigState() + if state != "Done" || err != nil { + if err != nil { + logger.Infof("Error getting machine config state for node %s: %v", node.GetName(), err) + } else { + logger.Infof("NODE %s IS %s", node.GetName(), state) + } logger.Infof("%s", node.PrettyString()) logger.Infof("#######################\n\n") mcdLogs, err := node.GetMCDaemonLogs("") diff --git a/test/extended-priv/machineconfiguration.go b/test/extended-priv/machineconfiguration.go new file mode 100644 index 0000000000..a5ecc8a6be --- /dev/null +++ b/test/extended-priv/machineconfiguration.go @@ -0,0 +1,50 @@ +package extended + +import ( + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" +) + +// MachineConfiguration struct is used to handle MachineConfiguration resources in OCP +type MachineConfiguration struct { + Resource +} + +// GetMachineConfiguration returns the "cluster" MachineConfiguration resource. It is the only MachineConfiguration resource that can be used +func GetMachineConfiguration(oc *exutil.CLI) *MachineConfiguration { + return &MachineConfiguration{Resource: *NewResource(oc, "machineconfiguration", "cluster")} +} + +// RemoveManagedBootImagesConfig removes the ManagedBootImagesConfig from the MachineConfig resource. It returns a function that can be used to restore the original config and an error. +func (mc MachineConfiguration) RemoveManagedBootImagesConfig() error { + logger.Infof("Removing .spec.managedBootImages from %s", mc) + managedBootImages, err := mc.Get(`{.spec.managedBootImages}`) + if err != nil { + return err + } + if managedBootImages == "" { + logger.Infof(".spec.managedBootImages does not exist. No need to remove it") + return nil + } + return mc.Patch("json", `[{ "op": "remove", "path": "/spec/managedBootImages"}]`) +} + +// SetAllManagedBootImagesConfig configures MachineConfiguration so that all machinesets are updated if necessary +func (mc MachineConfiguration) SetAllManagedBootImagesConfig(resource string) error { + return mc.Patch("merge", `{"spec":{"managedBootImages":{"machineManagers":[{"resource": "`+resource+`","apiGroup": "machine.openshift.io","selection": {"mode": "All"}}]}}}`) +} + +// SetPartialManagedBootImagesConfig configures MachineConfiguration so that only the machinesets with the given label are updated if necessary +func (mc MachineConfiguration) SetPartialManagedBootImagesConfig(resource, label, value string) error { + + if label == "" && value == "" { + return mc.Patch("merge", `{"spec":{"managedBootImages":{"machineManagers":[{"resource":"`+resource+`","apiGroup":"machine.openshift.io","selection":{"mode":"Partial","partial":{"machineResourceSelector":{"matchLabels":{}}}}}]}}}`) + } + + return mc.Patch("merge", `{"spec":{"managedBootImages":{"machineManagers":[{"resource":"`+resource+`","apiGroup":"machine.openshift.io","selection":{"mode":"Partial","partial":{"machineResourceSelector":{"matchLabels":{"`+label+`":"`+value+`"}}}}}]}}}`) +} + +// SetNoneManagedBootImagesConfig configures MachineConfiguration so that no machinesets are updated +func (mc MachineConfiguration) SetNoneManagedBootImagesConfig(resource string) error { + return mc.Patch("merge", `{"spec":{"managedBootImages":{"machineManagers":[{"resource": "`+resource+`","apiGroup": "machine.openshift.io","selection": {"mode": "None"}}]}}}`) +} diff --git a/test/extended-priv/machineset.go b/test/extended-priv/machineset.go new file mode 100644 index 0000000000..9ff26b0dea --- /dev/null +++ b/test/extended-priv/machineset.go @@ -0,0 +1,121 @@ +package extended + +import ( + "fmt" + + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// MachineSet struct to handle MachineSet resources +type MachineSet struct { + Resource +} + +// MachineSetList struct to handle lists of MachineSet resources +type MachineSetList struct { + ResourceList +} + +// NewMachineSet constructs a new MachineSet struct +func NewMachineSet(oc *exutil.CLI, namespace, name string) *MachineSet { + return &MachineSet{*NewNamespacedResource(oc, MachineSetFullName, namespace, name)} +} + +// NewMachineSetList constructs a new MachineSetList struct +func NewMachineSetList(oc *exutil.CLI, namespace string) *MachineSetList { + return &MachineSetList{*NewNamespacedResourceList(oc, MachineSetFullName, namespace)} +} + +// GetAll returns a []MachineSet list with all existing MachineSets +func (msl *MachineSetList) GetAll() ([]MachineSet, error) { + allMachineSetResources, err := msl.ResourceList.GetAll() + if err != nil { + return nil, err + } + allMachineSets := make([]MachineSet, 0, len(allMachineSetResources)) + + for _, msRes := range allMachineSetResources { + allMachineSets = append(allMachineSets, *NewMachineSet(msl.oc, msRes.GetNamespace(), msRes.GetName())) + } + + return allMachineSets, nil +} + +// convertUserDataToNewVersion converts the provided userData ignition config into the provided version format +// +//nolint:unparam // newIgnitionVersion is always "2.2.0", but kept for flexibility +func convertUserDataToNewVersion(userData, newIgnitionVersion string) (string, error) { + var err error + + currentIgnitionVersionResult := gjson.Get(userData, "ignition.version") + if !currentIgnitionVersionResult.Exists() || currentIgnitionVersionResult.String() == "" { + logger.Debugf("Could not get ignition version from ignition userData: %s", userData) + return "", fmt.Errorf("Could not get ignition version from ignition userData. Enable debug GINKGO_TEST_ENABLE_DEBUG_LOG to get more info") + } + currentIgnitionVersion := currentIgnitionVersionResult.String() + + if CompareVersions(currentIgnitionVersion, "==", newIgnitionVersion) { + logger.Infof("Current ignition version %s is the same as the new ignition version %s. No need to manipulate the userData info", + currentIgnitionVersion, newIgnitionVersion) + } else { + if CompareVersions(newIgnitionVersion, "<", "3.0.0") { + logger.Infof("New ignition version is %s, we need to adapt the userData ignition config to 2.0 config", newIgnitionVersion) + userData, err = ConvertUserDataIgnition3ToIgnition2(userData) + if err != nil { + return "", err + } + } + logger.Infof("Replace ignition version '%s' with version '%s'", currentIgnitionVersion, newIgnitionVersion) + userData, err = sjson.Set(userData, "ignition.version", newIgnitionVersion) + if err != nil { + return "", err + } + } + + return userData, nil +} + +// ConvertUserDataIgnition3ToIgnition2 transforms an ignitionV3 userdata configuration into an ignitionV2 userdata configuration +// IMPORTANT: If the ignition config includes storage or systemd they will be deleted. Don't expect this configuration to actually work +// +// The resulting 2.2.0 ignition config is only for testing purposes. If it is needed to actually transform the ignition version to +// 2.2.0 then this function needs to be modified. +func ConvertUserDataIgnition3ToIgnition2(ignition3 string) (string, error) { + var ( + ignition2 = ignition3 + err error + ) + logger.Infof("Replace the 'merge' action with the 'append' action") + merge := gjson.Get(ignition3, "ignition.config.merge") + if !merge.Exists() { + logger.Debugf("Could not find the 'merge' information in the ignition3 ignition config: %s", ignition3) + return "", fmt.Errorf("Could not find the 'merge' information in the ignition3 ignition config. Enable debug GINKGO_TEST_ENABLE_DEBUG_LOG to get more info") + } + ignition2, err = sjson.SetRaw(ignition2, "ignition.config.append", merge.String()) + if err != nil { + return "", err + } + + logger.Infof("Delete ignition.config.merge field") + ignition2, err = sjson.Delete(ignition2, "ignition.config.merge") + if err != nil { + return "", err + } + + logger.Infof("Delete storage field to create stub ignition config") + ignition2, err = sjson.Delete(ignition2, "storage") + if err != nil { + return "", err + } + + logger.Infof("Delete systemd field to create stub ignition config") + ignition2, err = sjson.Delete(ignition2, "systemd") + if err != nil { + return "", err + } + + return ignition2, nil +} diff --git a/test/extended-priv/mco.go b/test/extended-priv/mco.go index 6df0def8f4..0f30517759 100644 --- a/test/extended-priv/mco.go +++ b/test/extended-priv/mco.go @@ -1,6 +1,11 @@ package extended import ( + "fmt" + "strings" + + "github.com/Masterminds/semver/v3" + g "github.com/onsi/ginkgo/v2" o "github.com/onsi/gomega" exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" @@ -66,3 +71,33 @@ func checkDegraded(mcp *MachineConfigPool, expectedMessage, expectedReason, degr } logger.Infof("OK!\n") } + +func skipTestIfRHELVersion(node Node, operator, constraintVersion string) { + actualVersion, err := node.GetRHELVersion() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting RHEL version from node %s", node.GetName()) + + // Pad version to semantic version format if needed (e.g., "9.6" -> "9.6.0") + parts := strings.Split(actualVersion, ".") + for len(parts) < 3 { + parts = append(parts, "0") + } + paddedVersion := strings.Join(parts, ".") + + // Pad constraint version as well + constraintParts := strings.Split(constraintVersion, ".") + for len(constraintParts) < 3 { + constraintParts = append(constraintParts, "0") + } + paddedConstraintVersion := strings.Join(constraintParts, ".") + + // Parse versions for comparison + constraint, err := semver.NewConstraint(operator + paddedConstraintVersion) + o.Expect(err).NotTo(o.HaveOccurred(), "Error parsing version constraint") + + actual, err := semver.NewVersion(paddedVersion) + o.Expect(err).NotTo(o.HaveOccurred(), "Error parsing actual version %s (padded from %s)", paddedVersion, actualVersion) + + if constraint.Check(actual) { + g.Skip(fmt.Sprintf("Test requires RHEL version NOT %s %s, but node has %s", operator, constraintVersion, actualVersion)) + } +} diff --git a/test/extended-priv/mco_bootimages.go b/test/extended-priv/mco_bootimages.go new file mode 100644 index 0000000000..aa1c018666 --- /dev/null +++ b/test/extended-priv/mco_bootimages.go @@ -0,0 +1,202 @@ +package extended + +import ( + "bytes" + "fmt" + "os/exec" + + o "github.com/onsi/gomega" + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + "github.com/openshift/machine-config-operator/test/extended-priv/util/architecture" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "github.com/tidwall/gjson" + e2e "k8s.io/kubernetes/test/e2e/framework" +) + +// getCoreOsBootImageFromConfigMap retrieves the boot image from the coreos-bootimages ConfigMap for the given platform and architecture +func getCoreOsBootImageFromConfigMap(platform, region string, arch architecture.Architecture, coreosBootimagesCM *ConfigMap) (string, error) { + var coreOsBootImagePath string + stringArch := arch.GNUString() + + logger.Infof("Looking for coreos boot image for architecture %s in %s", stringArch, coreosBootimagesCM) + + streamJSON, err := coreosBootimagesCM.GetDataValue("stream") + if err != nil { + return "", err + } + parsedStream := gjson.Parse(streamJSON) + + switch platform { + case AWSPlatform: + if region == "" { + return "", fmt.Errorf("Region is empty for platform %s. The region is mandatory if we want to get the boot image value", platform) + } + coreOsBootImagePath = fmt.Sprintf(`architectures.%s.images.%s.regions.%s.image`, stringArch, platform, region) + case GCPPlatform: + coreOsBootImagePath = fmt.Sprintf(`architectures.%s.images.%s.name`, stringArch, platform) + case VspherePlatform: + // There is no such thing as a "bootimage in vsphere", we need to manually upload it always. We return the version instead, since it is the only info we can use to verify the bootimage + // in vsphere platform, the key is "vmware" and not "vsphere" + coreOsBootImagePath = fmt.Sprintf(`architectures.%s.artifacts.%s.release`, stringArch, "vmware") + case AzurePlatform: + coreOsBootImagePath = fmt.Sprintf(`architectures.%s.rhel-coreos-extensions.marketplace.%s.no-purchase-plan.hyperVGen2`, stringArch, "azure") + default: + return "", fmt.Errorf("Machineset.GetCoreOsBootImage method is only supported for GCP, Vsphere, Azure, and AWS platforms") + } + + currentCoreOsBootImage := parsedStream.Get(coreOsBootImagePath).String() + + if currentCoreOsBootImage == "" { + logger.Warnf("The coreos boot image for architecture %s in %s IS EMPTY. ImagePath: %s", stringArch, coreosBootimagesCM, coreOsBootImagePath) + } + + return currentCoreOsBootImage, nil +} + +// getCoreOsBootImageFromConfigMapOrFail gets the boot image and fails the test if there's an error +func getCoreOsBootImageFromConfigMapOrFail(platform, region string, arch architecture.Architecture, coreosBootimagesCM *ConfigMap) string { + image, err := getCoreOsBootImageFromConfigMap(platform, region, arch, coreosBootimagesCM) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the boot image from %s for platform %s and arch %s", coreosBootimagesCM, platform, arch) + return image +} + +// GetValidUpdateBootImageValue returns a valid boot image value for testing based on platform +// MCO will only update images previously published in the installer. This function returns one of those valid images +func GetValidUpdateBootImageValue(oc *exutil.CLI) string { + var ( + platform = exutil.CheckPlatform(oc) + ) + + switch platform { + case AWSPlatform: + // MCO will only update AMIS present in the list defined here https://github.com/openshift/machine-config-operator/pull/5122 + // We choose one of them + return "ami-0ffec236307e00b94" + case GCPPlatform: + // In GCP all images located in projects/rhcos-cloud/global/images are considered valid for update + return "projects/rhcos-cloud/global/images" + "/updateble-fake-image" + case AzurePlatform: + // In Azure we need to configure the whole image, not only one field. We need an image in resourceID and an empty sku field + // We use a similar resourceID as the one generated in a normal installation. Note that it contains "gen2", so it should use "hyperVGen2" + return `{"offer":"","publisher":"","resourceID":"/resourceGroups/fake-499nn-rg/providers/Microsoft.Compute/galleries/gallery_fake21az_499nn/images/fake-499nn-gen2/versions/latest","sku":"","version":""}` + case VspherePlatform: + // In Vsphere we need the image to be present in the vcenter, so we need to manually upload it + var ( + // We will use 4.16 as the original version that will be updated to the current version + imageVersion = "4.16" + // Vsphere only support AMD64 + arch = architecture.AMD64 + ) + + // Get the right base image name from the rhcos json info stored in the github repositories + exutil.By(fmt.Sprintf("Get the base image for version %s", imageVersion)) + rhcosHandler, err := GetRHCOSHandler(platform) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the rhcos handler") + + baseImage, err := rhcosHandler.GetBaseImageFromRHCOSImageInfo(imageVersion, arch, "") + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the base image") + logger.Infof("Using base image %s", baseImage) + + baseImageURL, err := rhcosHandler.GetBaseImageURLFromRHCOSImageInfo(imageVersion, arch) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the base image URL") + + // To avoid collisions we will add prefix to identify our image + baseImage = "mcotest-" + baseImage + o.Expect( + uploadBaseImageToCloud(oc, platform, baseImageURL, baseImage), + ).To(o.Succeed(), "Error uploading the base image %s to the cloud", baseImageURL) + logger.Infof("Uplodated: %s", baseImage) + logger.Infof("OK!\n") + + return baseImage + default: + return "" + } +} + +// getReleaseFromVsphereTemplate gets the release version from a vSphere template +func getReleaseFromVsphereTemplate(oc *exutil.CLI, vsphereTemplate string) (string, error) { + + var ( + execBin = "govc" + vmInfoCommand = []string{"vm.info", "-json", vsphereTemplate} + stderr bytes.Buffer + ) + + server, dataCenter, dataStore, resourcePool, user, password, err := getvSphereCredentials(oc.AsAdmin()) + if err != nil { + return "", err + } + + govcExecEnv := getGovcEnv(server, dataCenter, dataStore, resourcePool, user, password) + + logger.Infof("Getting information about vsphere template %s", vsphereTemplate) + logger.Infof("%s %s", execBin, vmInfoCommand) + + vmInfoCmd := exec.Command(execBin, vmInfoCommand...) + vmInfoCmd.Stderr = &stderr + vmInfoCmd.Env = govcExecEnv + + vmInfo, err := vmInfoCmd.Output() + if err != nil { + logger.Errorf("Output: %s", string(vmInfo)) + logger.Errorf("Stderr: %s", stderr.String()) + return "", err + } + + gVersion := gjson.Get(string(vmInfo), "virtualMachines.0.summary.config.product.version") + if !gVersion.Exists() { + return "", fmt.Errorf("Cannot get config from vm info: %s", vmInfoCmd) + } + + version := gVersion.String() + logger.Infof("Version for vm %s: %s", vsphereTemplate, version) + return version, nil +} + +// CheckCurrentOSImageIsUpdated checks that the machineset/controlplanemachineset is using the bootimage expected in the current cluster version +func CheckCurrentOSImageIsUpdated(bir BootImageResource) { + var ( + oc = bir.GetOC() + platform = exutil.CheckPlatform(oc) + region = getCurrentRegionOrFail(oc) + arch = bir.GetArchitectureOrFail() + coreosBootimagesCM = NewConfigMap(oc.AsAdmin(), MachineConfigNamespace, "coreos-bootimages") + ) + + currentCoreOsBootImage := getCoreOsBootImageFromConfigMapOrFail(platform, region, arch, coreosBootimagesCM) + logger.Infof("Current coreOsBootImage: %s", currentCoreOsBootImage) + o.Expect(currentCoreOsBootImage).NotTo(o.BeEmpty(), "Could not find the right coreOS image for this platform") + + switch platform { + case AWSPlatform, GCPPlatform: + o.Eventually(bir.GetCoreOsBootImage, "5m", "20s").Should(o.ContainSubstring(currentCoreOsBootImage), + "%s was NOT updated to use the right boot image", bir) + case VspherePlatform: + o.Eventually(func() (string, error) { + bootImage, err := bir.GetCoreOsBootImage() + if err != nil { + return "", err + } + return getReleaseFromVsphereTemplate(oc.AsAdmin(), bootImage) + }, "5m", "20s"). + Should(o.Equal(currentCoreOsBootImage), "The image used to update %s doen't have the right version", bir) + case AzurePlatform: + parsedImage := gjson.Parse(currentCoreOsBootImage) + sku := parsedImage.Get("sku").String() + version := parsedImage.Get("version").String() + offer := parsedImage.Get("offer").String() + publisher := parsedImage.Get("publisher").String() + + o.Eventually(bir.GetCoreOsBootImage, "5m", "20s").Should(o.And( + HavePathWithValue("publisher", o.Equal(publisher)), + HavePathWithValue("offer", o.Equal(offer)), + HavePathWithValue("sku", o.Equal(sku)), + HavePathWithValue("version", o.Equal(version)), + HavePathWithValue("resourceID", o.BeEmpty()), + HavePathWithValue("type", o.Equal("MarketplaceNoPlan"))), + "%s was NOT updated to use the right boot image", bir) + default: + e2e.Failf("Platform not supported in CheckCurrentOSImageIsUpdated: %s", platform) + } +} diff --git a/test/extended-priv/mco_controlplanemachineset.go b/test/extended-priv/mco_controlplanemachineset.go new file mode 100644 index 0000000000..cd41a02db8 --- /dev/null +++ b/test/extended-priv/mco_controlplanemachineset.go @@ -0,0 +1,367 @@ +package extended + +import ( + "strings" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" +) + +var _ = g.Describe("[sig-mco][Suite:openshift/machine-config-operator/disruptive][Serial][Disruptive][OCPFeatureGate:ManagedBootImagesCPMS] MCO ControlPlaneMachineSet", func() { + defer g.GinkgoRecover() + + var ( + oc = exutil.NewCLI("mco-controlplanemachineset", exutil.KubeConfigPath()) + // Common variables + cpms *ControlPlaneMachineSet + machines []Machine + machineConfiguration *MachineConfiguration + ) + + g.JustBeforeEach(func() { + // Skip if single node + SkipIfSNO(oc.AsAdmin()) + // Skip if no machineset + skipTestIfWorkersCannotBeScaled(oc.AsAdmin()) + // ControlPlaneMachineSet Bootimages Update functionality is only available in GCP, AWS, and Azure (Tech Preview) + skipTestIfSupportedPlatformNotMatched(oc, GCPPlatform, AWSPlatform, AzurePlatform) + // Skip if ManagedBootImagesCPMS feature gate is not enabled + SkipIfNoFeatureGate(oc.AsAdmin(), "ManagedBootImagesCPMS") + + PreChecks(oc) + + failureHandler := func(message string, callerSkip ...int) { + logger.Errorf("Gomega assertion failed!") + logger.Errorf("Failure message: %s", message) + + // debug the machinesets + oc.AsAdmin().Run("get", "-n", MachineAPINamespace, "machine.m", "-owide").Execute() + + // We are adding an extra level to the stack here. + // We adjust it so that the assertions can point to the right line of code + // What we do with the callerSkip is similar to configuring all assertions with Offset(1) (increasing offset by one) + if len(callerSkip) == 0 { + callerSkip = []int{1} // default offset should be 1 with this failureHandler wrapper + } + + // Increment the first value to account for this wrapper (increase the offset) + callerSkip[0]++ + + // Fail executing ginkgo failhandler + g.Fail(message, callerSkip...) + } + + o.RegisterFailHandler(failureHandler) + + cpms = NewControlPlaneMachineSet(oc.AsAdmin(), MachineAPINamespace, ControlPlaneMachineSetName) + if !cpms.Exists() { + g.Skip(` "cluster" ControlPlaneMachineset does not exist`) + } + + machines = cpms.GetMachinesOrFail() + machineConfiguration = GetMachineConfiguration(oc.AsAdmin()) + }) + + g.JustAfterEach(func() { + o.RegisterFailHandler(g.Fail) + }) + + // AI-assisted: This test case validates that marketplace boot images are correctly handled and NOT updated + g.It("[PolarionID:85478][OTP] ControlPlaneMachineSets. Correctly handle marketplace boot-images [apigroup:machineconfiguration.openshift.io]", func() { + // After talking with devs this test case doesn't make sense in Azure. + // In Azure we shouldn't be allowed to manipulate the values to set invalid values, and we will always update legacy images. Hence, we skip this test case. + skipTestIfSupportedPlatformNotMatched(oc, GCPPlatform, AWSPlatform) + + var ( + fakeImageName = "fake-image" // non-updateable marketplace image + userDataJSONVersionPath = `ignition.version` + ) + + o.Expect(machines).To(o.HaveLen(3), "Unexpected number of control plane machines") + + exutil.By("Backup the original ControlPlaneMachineSet spec for restoration") + originalCPMSSpec := cpms.GetSpecOrFail() + defer cpms.SetSpec(originalCPMSSpec) + logger.Infof("OK!\n") + + exutil.By("Set a 2.2.0 user-data secret in the ControlPlaneMachineSet") + logger.Infof("Getting the user-data secret and backing up its content") + userDataSecret, err := cpms.GetUserDataSecret() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting user-data secret from %s", cpms) + + // Backup original user-data content to restore later + originalUserData, err := userDataSecret.GetDataValue("userData") + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting userData from secret %s", userDataSecret) + defer userDataSecret.SetDataValue("userData", originalUserData) + + logger.Infof("Converting user-data to version 2.2.0") + convertedUserData, err := convertUserDataToNewVersion(originalUserData, "2.2.0") + o.Expect(err).NotTo(o.HaveOccurred(), "Error converting userData to version 2.2.0") + + logger.Infof("Updating the user-data secret with version 2.2.0") + o.Expect(userDataSecret.SetDataValue("userData", convertedUserData)).To(o.Succeed(), + "Error setting userData in secret %s", userDataSecret) + logger.Infof("OK!\n") + + exutil.By("Set a fake/non-updateable boot image in the ControlPlaneMachineSet") + o.Expect(cpms.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(), "Error setting a fake boot image in %s", cpms) + logger.Infof("OK!\n") + + exutil.By("Opt-in boot images update with All mode") + defer machineConfiguration.SetSpec(machineConfiguration.GetSpecOrFail()) + o.Expect( + machineConfiguration.SetAllManagedBootImagesConfig(ControlPlaneMachineSetResource), + ).To(o.Succeed(), "Error configuring All managedBootImages in the 'cluster' MachineConfiguration resource") + logger.Infof("OK!\n") + + exutil.By("Check that the bootimage was NOT updated") + // Marketplace/fake images should not be updated even with All mode configured + o.Consistently(cpms.GetCoreOsBootImage, "5m", "20s").Should(o.ContainSubstring(fakeImageName), + "%s was updated, but it shouldn't be updated for marketplace images", cpms) + logger.Infof("OK!\n") + + exutil.By("Check that the user-data secret was NOT updated") + // User-data should remain at 2.2.0 when boot image cannot be updated + o.Consistently(userDataSecret.GetDataValue, "1m", "15s").WithArguments("userData").Should( + HavePathWithValue(userDataJSONVersionPath, o.Equal("2.2.0")), + "The user-data secret was updated, but it shouldn't be updated for marketplace images") + logger.Infof("OK!\n") + }) + + // AI-assisted: This test case validates that Partial mode is not allowed for ControlPlaneMachineSets + g.It("[PolarionID:85399][OTP] ControlPlaneMachineSets boot-image update. Partial mode not allowed [apigroup:machineconfiguration.openshift.io]", func() { + + var ( + expectedError = "Only All or None selection mode is permitted for ControlPlaneMachineSets" + ) + + o.Expect(machines).To(o.HaveLen(3), "Unexpected number of control plane machines") + + exutil.By("Configure MachineConfiguration resource to use Partial mode for ControlPlaneMachineSet resources") + defer machineConfiguration.SetSpec(machineConfiguration.GetSpecOrFail()) + + err := machineConfiguration.SetPartialManagedBootImagesConfig(ControlPlaneMachineSetResource, "test-label", "test-value") + o.Expect(err).To(o.HaveOccurred(), "Expected error when configuring Partial mode for ControlPlaneMachineSets") + o.Expect(err).To(o.BeAssignableToTypeOf(&exutil.ExitError{}), "Unexpected error when configuring controlplanemachineset partial mode in MachineConfiguration") + o.Expect(err.(*exutil.ExitError).StdErr).To(o.ContainSubstring(expectedError), + "Error message does not match expected: %v", err) + logger.Infof("OK!\n") + }) + + // AI-assisted: This test case was created to validate ControlPlaneMachineSet boot-image update with All mode + g.It("[PolarionID:85467][OTP] ControlPlaneMachineSets. Bootimage upgrade stub ignition to spec 3 [apigroup:machineconfiguration.openshift.io]", func() { + + var ( + fakeImageName = GetValidUpdateBootImageValue(oc.AsAdmin()) + + userDataJSONVersionPath = `ignition.version` + ) + + o.Expect(machines).To(o.HaveLen(3), "Unexpected number of control plane machines") + + exutil.By("Backup the original ControlPlaneMachineSet spec for restoration") + originalCPMSSpec := cpms.GetSpecOrFail() + defer cpms.SetSpec(originalCPMSSpec) + logger.Infof("OK!\n") + + exutil.By("Opt-in boot images update with All mode") + defer machineConfiguration.SetSpec(machineConfiguration.GetSpecOrFail()) + o.Expect( + machineConfiguration.SetAllManagedBootImagesConfig(ControlPlaneMachineSetResource), + ).To(o.Succeed(), "Error configuring All managedBootImages in the 'cluster' MachineConfiguration resource") + logger.Infof("OK!\n") + + exutil.By("Set a 2.2.0 user-data secret in the ControlPlaneMachineSet") + logger.Infof("Getting the user-data secret and backing up its content") + userDataSecret, err := cpms.GetUserDataSecret() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting user-data secret from %s", cpms) + + // Backup original user-data content to restore later + originalUserData, err := userDataSecret.GetDataValue("userData") + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting userData from secret %s", userDataSecret) + defer userDataSecret.SetDataValue("userData", originalUserData) + + logger.Infof("Converting user-data to version 2.2.0") + convertedUserData, err := convertUserDataToNewVersion(originalUserData, "2.2.0") + o.Expect(err).NotTo(o.HaveOccurred(), "Error converting userData to version 2.2.0") + + logger.Infof("Updating the user-data secret with version 2.2.0") + o.Expect(userDataSecret.SetDataValue("userData", convertedUserData)).To(o.Succeed(), + "Error setting userData in secret %s", userDataSecret) + logger.Infof("OK!\n") + + exutil.By("Set a wrong boot image in the ControlPlaneMachineSet") + o.Expect(cpms.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(), "Error setting a fake boot image in %s", cpms) + logger.Infof("OK!\n") + + exutil.By("Check that the user-data secret is updated to the latest ignition version") + o.Eventually(userDataSecret.GetDataValue, "5m", "15s").WithArguments("userData").Should( + HavePathWithValue(userDataJSONVersionPath, o.Equal(IgnitionDefaultVersion)), + "The user-data secret was not updated to the latest ignition version") + + logger.Infof("OK!\n") + + exutil.By("Check that the boot image was updated with the right version") + // Check that it was actually updated + o.Eventually(cpms.GetCoreOsBootImage, "5m", "20s").ShouldNot(o.Or(o.Equal(fakeImageName), o.BeEmpty()), + "%s was NOT updated to use the right boot image", cpms) + // Check that the updated image is the right one + CheckCurrentOSImageIsUpdated(cpms) + logger.Infof("OK!\n") + + exutil.By("Delete one machine and wait for it to be recreated") + if cpms.IsActive() { + // Only delete machine if original userData does NOT have storage or systemd sections + hasStorage := strings.Contains(originalUserData, "storage") + hasSystemd := strings.Contains(originalUserData, "systemd") + + if !hasStorage && !hasSystemd { + DeleteOneMachineAndWaitForRecreation(cpms) + } else { + logger.Infof("Original user-data secret had storage or systemd sections, skipping machine deletion test") + } + } else { + logger.Infof("ControlPlaneMachineSet is not active, skipping machine deletion test") + } + logger.Infof("OK!\n") + }) + + // AI-assisted: This test case validates that boot images and user-data are NOT updated when using Mode: None + g.It("[PolarionID:85479][OTP] ControlPlaneMachineSets. Not updated when using Mode: None [apigroup:machineconfiguration.openshift.io]", func() { + + var ( + machineConfiguration = GetMachineConfiguration(oc.AsAdmin()) + fakeImageName = GetValidUpdateBootImageValue(oc.AsAdmin()) + + userDataJSONVersionPath = `ignition.version` + ) + + o.Expect(machines).To(o.HaveLen(3), "Unexpected number of control plane machines") + + exutil.By("Backup the original ControlPlaneMachineSet spec for restoration") + originalCPMSSpec := cpms.GetSpecOrFail() + defer cpms.SetSpec(originalCPMSSpec) + logger.Infof("OK!\n") + + exutil.By("Configure MachineConfiguration resource with mode None for controlplanemachinesets") + defer machineConfiguration.SetSpec(machineConfiguration.GetSpecOrFail()) + o.Expect( + machineConfiguration.SetNoneManagedBootImagesConfig(ControlPlaneMachineSetResource), + ).To(o.Succeed(), "Error configuring None managedBootImages in the 'cluster' MachineConfiguration resource") + logger.Infof("OK!\n") + + exutil.By("Set a 2.2.0 user-data secret in the ControlPlaneMachineSet") + logger.Infof("Getting the user-data secret and backing up its content") + userDataSecret, err := cpms.GetUserDataSecret() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting user-data secret from %s", cpms) + + // Backup original user-data content to restore later + originalUserData, err := userDataSecret.GetDataValue("userData") + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting userData from secret %s", userDataSecret) + defer userDataSecret.SetDataValue("userData", originalUserData) + + logger.Infof("Converting user-data to version 2.2.0") + convertedUserData, err := convertUserDataToNewVersion(originalUserData, "2.2.0") + o.Expect(err).NotTo(o.HaveOccurred(), "Error converting userData to version 2.2.0") + + logger.Infof("Updating the user-data secret with version 2.2.0") + o.Expect(userDataSecret.SetDataValue("userData", convertedUserData)).To(o.Succeed(), + "Error setting userData in secret %s", userDataSecret) + logger.Infof("OK!\n") + + exutil.By("Set a wrong boot image in the ControlPlaneMachineSet") + o.Expect(cpms.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(), "Error setting a fake boot image in %s", cpms) + logger.Infof("OK!\n") + + exutil.By("Check that the boot image was NOT updated") + // With Mode: None, the boot image should remain unchanged (still using the fake image) + o.Consistently(cpms.GetCoreOsBootImage, "3m", "30s").Should(o.Equal(fakeImageName), + "The boot image was unexpectedly updated when Mode: None was configured") + logger.Infof("OK!\n") + + exutil.By("Check that the master-user-data secret was NOT updated and is still using ignition 2.2.0") + // With Mode: None, the user-data should remain at version 2.2.0 + o.Consistently(userDataSecret.GetDataValue, "1m", "20s").WithArguments("userData").Should( + HavePathWithValue(userDataJSONVersionPath, o.Equal("2.2.0")), + "The user-data secret was unexpectedly updated when Mode: None was configured") + logger.Infof("OK!\n") + }) + + // AI-assisted: This test case validates that boot images and user-data are NOT updated when CPMS has an owner reference + g.It("[PolarionID:85480][OTP] ControlPlaneMachineSets. Not updated when owner reference [apigroup:machineconfiguration.openshift.io]", func() { + + var ( + fakeImageName = GetValidUpdateBootImageValue(oc.AsAdmin()) + + userDataJSONVersionPath = `ignition.version` + ) + + o.Expect(machines).To(o.HaveLen(3), "Unexpected number of control plane machines") + + exutil.By("Backup the original ControlPlaneMachineSet spec for restoration") + originalCPMSSpec := cpms.GetSpecOrFail() + defer cpms.SetSpec(originalCPMSSpec) + logger.Infof("OK!\n") + + exutil.By("Add owner reference to the ControlPlaneMachineSet resource") + // Backup original ownerReferences to restore later + originalOwnerRefs := cpms.GetOrFail(`{.metadata.ownerReferences}`) + defer func() { + if originalOwnerRefs == "" { + cpms.Patch("json", `[{"op": "remove", "path": "/metadata/ownerReferences"}]`) + } else { + cpms.Patch("json", `[{"op": "replace", "path": "/metadata/ownerReferences", "value": `+originalOwnerRefs+`}]`) + } + }() + + o.Expect( + cpms.Patch("merge", `{"metadata":{"ownerReferences": [{"apiVersion": "fake","blockOwnerDeletion": true,"controller": true,"kind": "fakekind","name": "master","uid": "fake-uuid"}]}}`), + ).To(o.Succeed(), "Error patching %s with a fake owner reference", cpms) + logger.Infof("OK!\n") + + exutil.By("Set a 2.2.0 user-data secret in the ControlPlaneMachineSet") + logger.Infof("Getting the user-data secret and backing up its content") + userDataSecret, err := cpms.GetUserDataSecret() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting user-data secret from %s", cpms) + + // Backup original user-data content to restore later + originalUserData, err := userDataSecret.GetDataValue("userData") + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting userData from secret %s", userDataSecret) + defer userDataSecret.SetDataValue("userData", originalUserData) + + logger.Infof("Converting user-data to version 2.2.0") + convertedUserData, err := convertUserDataToNewVersion(originalUserData, "2.2.0") + o.Expect(err).NotTo(o.HaveOccurred(), "Error converting userData to version 2.2.0") + + logger.Infof("Updating the user-data secret with version 2.2.0") + o.Expect(userDataSecret.SetDataValue("userData", convertedUserData)).To(o.Succeed(), + "Error setting userData in secret %s", userDataSecret) + logger.Infof("OK!\n") + + exutil.By("Set a wrong boot image in the ControlPlaneMachineSet") + o.Expect(cpms.SetCoreOsBootImage(fakeImageName)).To(o.Succeed(), "Error setting a fake boot image in %s", cpms) + logger.Infof("OK!\n") + + exutil.By("Configure MachineConfiguration resource with mode All for controlplanemachinesets") + defer machineConfiguration.SetSpec(machineConfiguration.GetSpecOrFail()) + o.Expect( + machineConfiguration.SetAllManagedBootImagesConfig(ControlPlaneMachineSetResource), + ).To(o.Succeed(), "Error configuring All managedBootImages in the 'cluster' MachineConfiguration resource") + logger.Infof("OK!\n") + + exutil.By("Check that the boot image was NOT updated") + // With owner reference, the boot image should remain unchanged even with Mode: All + o.Consistently(cpms.GetCoreOsBootImage, "3m", "30s").Should(o.Equal(fakeImageName), + "The boot image was unexpectedly updated when owner reference was present") + logger.Infof("OK!\n") + + exutil.By("Check that the master-user-data secret was NOT updated and is still using ignition 2.2.0") + // With owner reference, the user-data should remain at version 2.2.0 even with Mode: All + o.Consistently(userDataSecret.GetDataValue, "1m", "20s").WithArguments("userData").Should( + HavePathWithValue(userDataJSONVersionPath, o.Equal("2.2.0")), + "The user-data secret was unexpectedly updated when owner reference was present") + logger.Infof("OK!\n") + }) +}) diff --git a/test/extended-priv/mco_scale.go b/test/extended-priv/mco_scale.go new file mode 100644 index 0000000000..38e1065ab8 --- /dev/null +++ b/test/extended-priv/mco_scale.go @@ -0,0 +1,467 @@ +package extended + +import ( + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path" + "regexp" + "strings" + "time" + + exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + "github.com/openshift/machine-config-operator/test/extended-priv/util/architecture" + logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "github.com/tidwall/gjson" +) + +// getCurrentRegionOrFail returns the current region if we are in AWS or an empty string if any other platform +func getCurrentRegionOrFail(oc *exutil.CLI) string { + infra := NewResource(oc.AsAdmin(), "infrastructure", "cluster") + return infra.GetOrFail(`{.status.platformStatus.aws.region}`) +} + +// extractINIValue parses a simple INI file and extracts a value +// +//nolint:unparam // section is always "Worker", but kept for flexibility +func extractINIValue(config, section, key string) string { + // Find the section + sectionRegex := regexp.MustCompile(`(?m)^\[` + regexp.QuoteMeta(section) + `\]`) + sectionMatch := sectionRegex.FindStringIndex(config) + if sectionMatch == nil { + return "" + } + + // Find the next section or end of file + nextSectionRegex := regexp.MustCompile(`(?m)^\[`) + configFromSection := config[sectionMatch[1]:] + nextSectionMatch := nextSectionRegex.FindStringIndex(configFromSection) + + var sectionContent string + if nextSectionMatch != nil { + sectionContent = configFromSection[:nextSectionMatch[0]] + } else { + sectionContent = configFromSection + } + + // Find the key=value pair + keyRegex := regexp.MustCompile(`(?m)^\s*` + regexp.QuoteMeta(key) + `\s*=\s*(.*)$`) + keyMatch := keyRegex.FindStringSubmatch(sectionContent) + if keyMatch != nil && len(keyMatch) > 1 { + return strings.TrimSpace(keyMatch[1]) + } + + return "" +} + +func getGovcEnv(server, dataCenter, dataStore, resourcePool, user, password string) []string { + var ( + govcEnv = []string{ + "GOVC_URL=" + server, + "GOVC_USERNAME=" + user, + "GOVC_PASSWORD=" + password, + "GOVC_DATASTORE=" + dataStore, + "GOVC_RESOURCE_POOL=" + resourcePool, + "GOVC_DATACENTER=" + dataCenter, + "GOVC_INSECURE=true", + } + originalEnv = os.Environ() + ) + + // In prow the GOVC_TLS_CA_CERTS is not correctly set and it is making the govc command fail. + // we remove this variable from the environment + var execEnv []string + for _, envVar := range originalEnv { + if strings.HasPrefix(envVar, "GOVC_TLS_CA_CERTS=") { + continue + } + execEnv = append(execEnv, envVar) + } + execEnv = append(execEnv, govcEnv...) + + return execEnv +} + +func getvSphereCredentials(oc *exutil.CLI) (server, dataCenter, dataStore, resourcePool, user, password string, err error) { + var ( + configCM = NewConfigMap(oc.AsAdmin(), "openshift-config", "cloud-provider-config") + credsSecret = NewSecret(oc.AsAdmin(), "kube-system", "vsphere-creds") + ) + config, err := configCM.GetDataValue("config") + if err != nil { + return + } + + // Try to parse as INI format (simple regex-based parsing) + iniParsed := false + if strings.Contains(config, "[Workspace]") { + logger.Infof("%s config info is in ini format. Extracting data", configCM) + server = extractINIValue(config, "Workspace", "server") + dataCenter = extractINIValue(config, "Workspace", "datacenter") + dataStore = extractINIValue(config, "Workspace", "default-datastore") + resourcePool = extractINIValue(config, "Workspace", "resourcepool-path") + if server != "" && dataCenter != "" { + iniParsed = true + } + } + + if !iniParsed { + logger.Infof("%s config info is NOT in ini fomart. Trying to extract the information from the infrastructure resource", configCM) + infra := NewResource(oc.AsAdmin(), "infrastructure", "cluster") + var failureDomain string + failureDomain, err = infra.Get(`{.spec.platformSpec.vsphere.failureDomains[0]}`) + if err != nil { + logger.Errorf("Cannot get the failureDomain from the infrastructure resource: %s", err) + return + } + if failureDomain == "" { + logger.Errorf("Failure domain is empty in the infrastructure resource: %s\n%s", err, infra.PrettyString()) + err = fmt.Errorf("Empty failure domain in the infrastructure resource") + return + + } + gserver := gjson.Get(failureDomain, "server") + if gserver.Exists() { + server = gserver.String() + } else { + err = fmt.Errorf("Cannot get the server value from failureDomain\n%s", infra.PrettyString()) + return + } + gdataCenter := gjson.Get(failureDomain, "topology.datacenter") + if gdataCenter.Exists() { + dataCenter = gdataCenter.String() + } else { + err = fmt.Errorf("Cannot get the data center value from failureDomain\n%s", infra.PrettyString()) + return + } + + gdataStore := gjson.Get(failureDomain, "topology.datastore") + if gdataStore.Exists() { + dataStore = gdataStore.String() + } else { + err = fmt.Errorf("Cannot get the data store value from failureDomain\n%s", infra.PrettyString()) + return + } + + gresourcePool := gjson.Get(failureDomain, "topology.resourcePool") + if gresourcePool.Exists() { + resourcePool = gresourcePool.String() + } else { + err = fmt.Errorf("Cannot get the resourcepool value from failureDomain\n%s", infra.PrettyString()) + return + } + } + + decodedData, err := credsSecret.GetDecodedDataMap() + if err != nil { + return + } + + for k, v := range decodedData { + item := v + if strings.Contains(k, "username") { + user = item + } + if strings.Contains(k, "password") { + password = item + } + } + + if user == "" { + logger.Errorf("Empty vsphere user") + err = fmt.Errorf("The vsphere user is empty") + return + } + + if password == "" { + logger.Errorf("Empty vsphere password") + err = fmt.Errorf("The vsphere password is empty") + return + } + + return +} + +func getRHCOSImagesInfo(version string) (string, error) { + var ( + err error + resp *http.Response + numRetries = 3 + retryDelay = time.Minute + rhcosURL = fmt.Sprintf("https://raw.githubusercontent.com/openshift/installer/release-%s/data/data/rhcos.json", version) + ) + + if CompareVersions(version, ">=", "4.10") { + rhcosURL = fmt.Sprintf("https://raw.githubusercontent.com/openshift/installer/release-%s/data/data/coreos/rhcos.json", version) + } + + // To mitigate network errors we will retry in case of failure + logger.Infof("Getting rhcos image info from: %s", rhcosURL) + for i := 0; i < numRetries; i++ { + if i > 0 { + logger.Infof("Error while getting the rhcos mages json data: %s.\nWaiting %s and retrying. Num retries: %d", err, retryDelay, i) + time.Sleep(retryDelay) + } + resp, err = http.Get(rhcosURL) + if err == nil { + break + } + } + + if err != nil { + return "", err + } + defer resp.Body.Close() + + // We Read the response body + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func GetRHCOSHandler(platform string) (RHCOSHandler, error) { + switch platform { + case AWSPlatform: + return AWSRHCOSHandler{}, nil + case GCPPlatform: + return GCPRHCOSHandler{}, nil + case VspherePlatform: + return VsphereRHCOSHandler{}, nil + default: + return nil, fmt.Errorf("Platform %s is not supported and cannot get RHCOSHandler", platform) + } +} + +type RHCOSHandler interface { + GetBaseImageFromRHCOSImageInfo(version string, arch architecture.Architecture, region string) (string, error) + GetBaseImageURLFromRHCOSImageInfo(version string, arch architecture.Architecture) (string, error) +} + +type AWSRHCOSHandler struct{} + +func (aws AWSRHCOSHandler) GetBaseImageFromRHCOSImageInfo(version string, arch architecture.Architecture, region string) (string, error) { + var ( + path string + stringArch = arch.GNUString() + platform = AWSPlatform + ) + + rhcosImageInfo, err := getRHCOSImagesInfo(version) + if err != nil { + return "", err + } + + if region == "" { + return "", fmt.Errorf("Region cannot have an empty value when we try to get the base image in platform %s", platform) + } + if CompareVersions(version, "<", "4.10") { + path = `amis.` + region + `.hvm` + } else { + path = fmt.Sprintf("architectures.%s.images.%s.regions.%s.image", stringArch, platform, region) + + } + + logger.Infof("Looking for rhcos base image info in path %s", path) + baseImage := gjson.Get(rhcosImageInfo, path) + if !baseImage.Exists() { + logger.Infof("rhcos info:\n%s", rhcosImageInfo) + return "", fmt.Errorf("Could not find the base image for version <%s> in platform <%s> architecture <%s> and region <%s> with path %s", + version, platform, arch, region, path) + } + return baseImage.String(), nil +} + +func (aws AWSRHCOSHandler) GetBaseImageURLFromRHCOSImageInfo(version string, arch architecture.Architecture) (string, error) { + return getBaseImageURLFromRHCOSImageInfo(version, "aws", "vmdk.gz", arch.GNUString()) +} + +type GCPRHCOSHandler struct{} + +func (gcp GCPRHCOSHandler) GetBaseImageFromRHCOSImageInfo(version string, arch architecture.Architecture, region string) (string, error) { + var ( + imagePath string + projectPath string + stringArch = arch.GNUString() + platform = GCPPlatform + ) + + if CompareVersions(version, "==", "4.1") { + return "", fmt.Errorf("There is no image base image supported for platform %s in version %s", platform, version) + } + + rhcosImageInfo, err := getRHCOSImagesInfo(version) + if err != nil { + return "", err + } + + if CompareVersions(version, "<", "4.10") { + imagePath = "gcp.image" + projectPath = "gcp.project" + } else { + imagePath = fmt.Sprintf("architectures.%s.images.%s.name", stringArch, platform) + projectPath = fmt.Sprintf("architectures.%s.images.%s.project", stringArch, platform) + } + + logger.Infof("Looking for rhcos base image name in path %s", imagePath) + baseImage := gjson.Get(rhcosImageInfo, imagePath) + if !baseImage.Exists() { + logger.Infof("rhcos info:\n%s", rhcosImageInfo) + return "", fmt.Errorf("Could not find the base image for version <%s> in platform <%s> architecture <%s> and region <%s> with path %s", + version, platform, arch, region, imagePath) + } + + logger.Infof("Looking for rhcos base image project in path %s", projectPath) + project := gjson.Get(rhcosImageInfo, projectPath) + if !project.Exists() { + logger.Infof("rhcos info:\n%s", rhcosImageInfo) + return "", fmt.Errorf("Could not find the project where the base image is stored with version <%s> in platform <%s> architecture <%s> and region <%s> with path %s", + version, platform, arch, region, projectPath) + } + + return fmt.Sprintf("projects/%s/global/images/%s", project.String(), baseImage.String()), nil +} + +func (gcp GCPRHCOSHandler) GetBaseImageURLFromRHCOSImageInfo(version string, arch architecture.Architecture) (string, error) { + return getBaseImageURLFromRHCOSImageInfo(version, "gcp", "tar.gz", arch.GNUString()) +} + +type VsphereRHCOSHandler struct{} + +func (vsp VsphereRHCOSHandler) GetBaseImageFromRHCOSImageInfo(version string, arch architecture.Architecture, _ string) (string, error) { + baseImageURL, err := vsp.GetBaseImageURLFromRHCOSImageInfo(version, arch) + if err != nil { + return "", err + } + + return path.Base(baseImageURL), nil +} + +func (vsp VsphereRHCOSHandler) GetBaseImageURLFromRHCOSImageInfo(version string, arch architecture.Architecture) (string, error) { + return getBaseImageURLFromRHCOSImageInfo(version, "vmware", "ova", arch.GNUString()) +} + +func getBaseImageURLFromRHCOSImageInfo(version, platform, format, stringArch string) (string, error) { + var ( + imagePath string + baseURIPath string + olderThan410 = CompareVersions(version, "<", "4.10") + ) + + rhcosImageInfo, err := getRHCOSImagesInfo(version) + if err != nil { + return "", err + } + + if olderThan410 { + imagePath = fmt.Sprintf("images.%s.path", platform) + baseURIPath = "baseURI" + } else { + imagePath = fmt.Sprintf("architectures.%s.artifacts.%s.formats.%s.disk.location", stringArch, platform, strings.ReplaceAll(format, ".", `\.`)) + } + + logger.Infof("Looking for rhcos base image path name in path %s", imagePath) + baseImageURL := gjson.Get(rhcosImageInfo, imagePath) + if !baseImageURL.Exists() { + logger.Infof("rhcos info:\n%s", rhcosImageInfo) + return "", fmt.Errorf("Could not find the base image for version <%s> in platform <%s> architecture <%s> and format <%s> with path %s", + version, platform, stringArch, format, imagePath) + } + + if !olderThan410 { + return baseImageURL.String(), nil + } + + logger.Infof("Looking for baseURL in path %s", baseURIPath) + baseURI := gjson.Get(rhcosImageInfo, baseURIPath) + if !baseURI.Exists() { + logger.Infof("rhcos info:\n%s", rhcosImageInfo) + return "", fmt.Errorf("Could not find the base URI with version <%s> in platform <%s> architecture <%s> and format <%s> with path %s", + version, platform, stringArch, format, baseURIPath) + } + + return fmt.Sprintf("%s/%s", strings.Replace(strings.Trim(baseURI.String(), "/"), "releases-art-rhcos.svc.ci.openshift.org", "rhcos.mirror.openshift.com", 1), strings.Trim(baseImageURL.String(), "/")), nil +} + +func uploadBaseImageToCloud(oc *exutil.CLI, platform, baseImageURL, baseImage string) error { + + switch platform { + case AWSPlatform: + logger.Infof("No need to updload images in AWS") + return nil + case GCPPlatform: + logger.Infof("No need to updload images in GCP") + return nil + case VspherePlatform: + server, dataCenter, dataStore, resourcePool, user, password, err := getvSphereCredentials(oc.AsAdmin()) + if err != nil { + return err + } + + err = uploadBaseImageToVsphere(baseImageURL, baseImage, server, dataCenter, dataStore, resourcePool, user, password) + if err != nil { + return err + } + + return nil + default: + return fmt.Errorf("Platform %s is not supported, base image cannot be updloaded", platform) + } +} + +func uploadBaseImageToVsphere(baseImageSrc, baseImageDest, server, dataCenter, dataStore, resourcePool, user, password string) error { + var ( + execBin = "govc" + uploadCommand = []string{"import.ova", "--debug", "--name", baseImageDest, baseImageSrc} + upgradeHWCommand = []string{"vm.upgrade", "-vm", baseImageDest} + templateCommand = []string{"vm.markastemplate", baseImageDest} + govcExecEnv = getGovcEnv(server, dataCenter, dataStore, resourcePool, user, password) + ) + + logger.Infof("Uploading base image %s to vsphere with name %s", baseImageSrc, baseImageDest) + logger.Infof("%s %s", execBin, uploadCommand) + + uploadCmd := exec.Command(execBin, uploadCommand...) + uploadCmd.Env = govcExecEnv + + out, err := uploadCmd.CombinedOutput() + logger.Infof(string(out)) + if err != nil { + if strings.Contains(string(out), "already exists") { + logger.Infof("Image %s already exists in the cloud, we don't upload it again", baseImageDest) + } else { + return err + } + } + + logger.Infof("Upgrading VM's hardware") + logger.Infof("%s %s", execBin, upgradeHWCommand) + + upgradeCmd := exec.Command(execBin, upgradeHWCommand...) + upgradeCmd.Env = govcExecEnv + + out, err = upgradeCmd.CombinedOutput() + logger.Infof(string(out)) + if err != nil { + // We don't fail. We log a warning and continue. + logger.Warnf("ERROR UPGRADING HARDWARE: %s", err) + } + + logger.Infof("Transforming VM into template") + logger.Infof("%s %s", execBin, templateCommand) + + templateCmd := exec.Command(execBin, templateCommand...) + templateCmd.Env = govcExecEnv + + out, err = templateCmd.CombinedOutput() + logger.Infof(string(out)) + if err != nil { + // We don't fail. We log a warning and continue. + logger.Warnf("ERROR CONVERTING INTO TEMPLATE: %s", err) + } + + return nil +} diff --git a/test/extended-priv/node.go b/test/extended-priv/node.go index 4f8e63bbb7..85a0d1ed7b 100644 --- a/test/extended-priv/node.go +++ b/test/extended-priv/node.go @@ -12,8 +12,10 @@ import ( expect "github.com/google/goexpect" exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" + "github.com/openshift/machine-config-operator/test/extended-priv/util/architecture" logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" o "github.com/onsi/gomega" @@ -210,7 +212,10 @@ func (n *Node) GetCurrentBootOSImage() (string, error) { // RestoreDesiredConfig changes the value of the desiredConfig annotation to equal the value of currentConfig. desiredConfig=currentConfig. func (n *Node) RestoreDesiredConfig() error { - currentConfig := n.GetCurrentMachineConfig() + currentConfig, err := n.GetCurrentMachineConfig() + if err != nil { + return err + } if currentConfig == "" { return fmt.Errorf("currentConfig annotation has an empty value in node %s", n.GetName()) } @@ -225,8 +230,8 @@ func (n *Node) RestoreDesiredConfig() error { } // GetCurrentMachineConfig returns the ID of the current machine config used in the node -func (n *Node) GetCurrentMachineConfig() string { - return n.GetOrFail(`{.metadata.annotations.machineconfiguration\.openshift\.io/currentConfig}`) +func (n *Node) GetCurrentMachineConfig() (string, error) { + return n.Get(`{.metadata.annotations.machineconfiguration\.openshift\.io/currentConfig}`) } // GetCurrentImage returns the current image used in this node @@ -235,13 +240,13 @@ func (n *Node) GetCurrentImage() string { } // GetDesiredMachineConfig returns the ID of the machine config that we want the node to use -func (n *Node) GetDesiredMachineConfig() string { - return n.GetOrFail(`{.metadata.annotations.machineconfiguration\.openshift\.io/desiredConfig}`) +func (n *Node) GetDesiredMachineConfig() (string, error) { + return n.Get(`{.metadata.annotations.machineconfiguration\.openshift\.io/desiredConfig}`) } // GetMachineConfigState returns the State of machineconfiguration process -func (n *Node) GetMachineConfigState() string { - return n.GetOrFail(`{.metadata.annotations.machineconfiguration\.openshift\.io/state}`) +func (n *Node) GetMachineConfigState() (string, error) { + return n.Get(`{.metadata.annotations.machineconfiguration\.openshift\.io/state}`) } // PatchDesiredConfig patches the desiredConfig annotation with the provided value @@ -270,13 +275,23 @@ func (n *Node) HasBeenDrained() bool { } // IsUpdated returns if the node is pending for machineconfig configuration or it is up to date -func (n *Node) IsUpdated() bool { - return (n.GetCurrentMachineConfig() == n.GetDesiredMachineConfig()) && (n.GetMachineConfigState() == "Done") -} +func (n *Node) IsUpdated() (bool, error) { + currentConfig, err := n.GetCurrentMachineConfig() + if err != nil { + return false, err + } -// IsReady returns if the node is in Ready condition -func (n *Node) IsReady() bool { - return n.IsConditionStatusTrue("Ready") + desiredConfig, err := n.GetDesiredMachineConfig() + if err != nil { + return false, err + } + + state, err := n.GetMachineConfigState() + if err != nil { + return false, err + } + + return (currentConfig == desiredConfig) && (state == "Done"), nil } // IsTainted returns if the node hast taints or not @@ -316,6 +331,11 @@ func (n *Node) IsEdge() (bool, error) { return true, nil } +// IsReady returns if the node is in Ready condition +func (n *Node) IsReady() bool { + return n.IsConditionStatusTrue("Ready") +} + // GetMCDaemonLogs returns the logs of the MachineConfig daemonset pod for this node. The logs will be grepped using the 'filter' parameter func (n *Node) GetMCDaemonLogs(filter string) (string, error) { var ( @@ -330,150 +350,6 @@ func (n *Node) GetMCDaemonLogs(filter string) (string, error) { return mcdLogs, err } -// CopyFromLocal copies a local file to the node -func (n *Node) CopyFromLocal(from, to string) error { - immediate := true - waitErr := wait.PollUntilContextTimeout(context.TODO(), 1*time.Minute, 5*time.Minute, immediate, func(_ context.Context) (bool, error) { - kubeletReady := n.IsReady() - if kubeletReady { - return true, nil - } - logger.Warnf("Kubelet is not ready in %s. To copy the file to the node we need to wait for kubelet to be ready. Waiting...", n) - return false, nil - }) - - if waitErr != nil { - logger.Errorf("Cannot copy file %s to %s in node %s because Kubelet is not ready in this node", from, to, n) - return waitErr - } - - return n.oc.Run("adm").Args("copy-to-node", "node/"+n.GetName(), fmt.Sprintf("--copy=%s=%s", from, to)).Execute() -} - -// CopyToLocal Copy a file or directory in the node to a local path -func (n *Node) CopyToLocal(from, to string) error { - logger.Infof("Node: %s. Copying file %s to local path %s", - n.GetName(), from, to) - mcDaemonName := n.GetMachineConfigDaemon() - fromDaemon := filepath.Join("/rootfs", from) - - return n.oc.Run("cp").Args("-n", MachineConfigNamespace, mcDaemonName+":"+fromDaemon, to, "-c", MachineConfigDaemon).Execute() -} - -// GetPool returns the only pool owning this node -func (n *Node) GetPrimaryPool() (*MachineConfigPool, error) { - allMCPs, err := NewMachineConfigPoolList(n.oc).GetAll() - if err != nil { - return nil, err - } - - var primaryPool *MachineConfigPool - for _, item := range allMCPs { - pool := item - allNodes, err := pool.getSelectedNodes("") - if err != nil { - return nil, err - } - - for _, node := range allNodes { - if node.GetName() != n.GetName() { - continue - } - - // We use short circuit evaluation to set the primary pool: - // - If the pool is master, it will be the primary pool; - // - If the primary pool is nil (not set yet), we set the primary pool (either worker or custom); - // - If the primary pool is not nil, we overwrite it only if the primary pool is a worker. - if pool.IsMaster() || primaryPool == nil || primaryPool.IsWorker() { - primaryPool = &pool - } else if pool.IsCustom() && primaryPool != nil && primaryPool.IsCustom() { - // Error condition: the node belongs to 2 custom pools - return nil, fmt.Errorf("Forbidden configuration. The node %s belongs to 2 custom pools: %s and %s", - node.GetName(), primaryPool.GetName(), pool.GetName()) - } - } - } - - return primaryPool, nil -} - -// GetMachineConfigNode returns the MachineConfigNode resource linked to this node -func (n *Node) GetMachineConfigNode() *MachineConfigNode { - return NewMachineConfigNode(n.oc.AsAdmin(), n.GetName()) -} - -// GetAll returns a []Node list with all existing nodes -func (nl *NodeList) GetAll() ([]Node, error) { - allNodeResources, err := nl.ResourceList.GetAll() - if err != nil { - return nil, err - } - allNodes := make([]Node, 0, len(allNodeResources)) - - for _, nodeRes := range allNodeResources { - allNodes = append(allNodes, *NewNode(nl.oc, nodeRes.name)) - } - - return allNodes, nil -} - -// GetAllReady returns a []Node list with all ready nodes -func (nl *NodeList) GetAllReady() ([]Node, error) { - allNodes, err := nl.GetAll() - if err != nil { - return nil, err - } - - readyNodes := make([]Node, 0) - for _, node := range allNodes { - if node.IsReady() { - readyNodes = append(readyNodes, node) - } - } - - return readyNodes, nil -} - -// quietSetNamespacePrivileged invokes exutil.SetNamespacePrivileged but disable the logs output to avoid noise in the logs -func quietSetNamespacePrivileged(oc *exutil.CLI, namespace string) error { - oc.NotShowInfo() - defer oc.SetShowInfo() - - logger.Debugf("Setting namespace %s as privileged", namespace) - return exutil.SetNamespacePrivileged(oc, namespace) -} - -// quietRecoverNamespaceRestricted invokes exutil.RecoverNamespaceRestricted but disable the logs output to avoid noise in the logs -func quietRecoverNamespaceRestricted(oc *exutil.CLI, namespace string) error { - oc.NotShowInfo() - defer oc.SetShowInfo() - - logger.Debugf("Recovering namespace %s from privileged", namespace) - return exutil.RecoverNamespaceRestricted(oc, namespace) -} - -// GetOperatorNode returns the node running the MCO operator pod -func GetOperatorNode(oc *exutil.CLI) (*Node, error) { - podsList := NewNamespacedResourceList(oc.AsAdmin(), "pods", MachineConfigNamespace) - podsList.ByLabel("k8s-app=machine-config-operator") - - mcoPods, err := podsList.GetAll() - if err != nil { - return nil, err - } - - if len(mcoPods) != 1 { - return nil, fmt.Errorf("There should be 1 and only 1 MCO operator pod. Found operator pods: %s", mcoPods) - } - - nodeName, err := mcoPods[0].Get(`{.spec.nodeName}`) - if err != nil { - return nil, err - } - - return NewNode(oc, nodeName), nil -} - // GetDateOrFail executes GetDate and fails the test if there is an error func (n *Node) GetDateOrFail() time.Time { date, err := n.GetDate() @@ -639,3 +515,188 @@ func (n *Node) GetRHELVersion() (string, error) { logger.Infof("Node %s RHEL_VERSION %s", n.GetName(), rhelVersion) return rhelVersion, nil } + +// GetPool returns the only pool owning this node +func (n *Node) GetPrimaryPool() (*MachineConfigPool, error) { + allMCPs, err := NewMachineConfigPoolList(n.oc).GetAll() + if err != nil { + return nil, err + } + + var primaryPool *MachineConfigPool + for _, item := range allMCPs { + pool := item + allNodes, err := pool.getSelectedNodes("") + if err != nil { + return nil, err + } + + for _, node := range allNodes { + if node.GetName() != n.GetName() { + continue + } + + // We use short circuit evaluation to set the primary pool: + // - If the pool is master, it will be the primary pool; + // - If the primary pool is nil (not set yet), we set the primary pool (either worker or custom); + // - If the primary pool is not nil, we overwrite it only if the primary pool is a worker. + if pool.IsMaster() || primaryPool == nil || primaryPool.IsWorker() { + primaryPool = &pool + } else if pool.IsCustom() && primaryPool != nil && primaryPool.IsCustom() { + // Error condition: the node belongs to 2 custom pools + return nil, fmt.Errorf("Forbidden configuration. The node %s belongs to 2 custom pools: %s and %s", + node.GetName(), primaryPool.GetName(), pool.GetName()) + } + } + } + + if primaryPool == nil { + return nil, fmt.Errorf("Could not find the primary pool for %s", n) + } + + return primaryPool, nil +} + +// GetArchitecture returns the architecture of the node +func (n *Node) GetArchitecture() (architecture.Architecture, error) { + arch, err := n.Get(`{.status.nodeInfo.architecture}`) + if err != nil { + return architecture.UNKNOWN, err + } + return architecture.FromString(arch), nil +} + +// GetArchitectureOrFail get the architecture used in the node and fail the test if any error happens while doing it +func (n *Node) GetArchitectureOrFail() architecture.Architecture { + arch, err := n.GetArchitecture() + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting the architecture of node %s", n.GetName()) + + return arch +} + +// GetMachineConfigNode returns the MachineConfigNode resource linked to this node +func (n *Node) GetMachineConfigNode() *MachineConfigNode { + return NewMachineConfigNode(n.oc.AsAdmin(), n.GetName()) +} + +// GetAll returns a []Node list with all existing nodes +func (nl *NodeList) GetAll() ([]Node, error) { + allNodeResources, err := nl.ResourceList.GetAll() + if err != nil { + return nil, err + } + allNodes := make([]Node, 0, len(allNodeResources)) + + for _, nodeRes := range allNodeResources { + allNodes = append(allNodes, *NewNode(nl.oc, nodeRes.name)) + } + + return allNodes, nil +} + +// GetAllReady returns a []Node list with all ready nodes +func (nl *NodeList) GetAllReady() ([]Node, error) { + allNodes, err := nl.GetAll() + if err != nil { + return nil, err + } + + readyNodes := make([]Node, 0) + for _, node := range allNodes { + if node.IsReady() { + readyNodes = append(readyNodes, node) + } + } + + return readyNodes, nil +} + +// quietSetNamespacePrivileged invokes exutil.SetNamespacePrivileged but disable the logs output to avoid noise in the logs +func quietSetNamespacePrivileged(oc *exutil.CLI, namespace string) error { + oc.NotShowInfo() + defer oc.SetShowInfo() + + logger.Debugf("Setting namespace %s as privileged", namespace) + return exutil.SetNamespacePrivileged(oc, namespace) +} + +// quietRecoverNamespaceRestricted invokes exutil.RecoverNamespaceRestricted but disable the logs output to avoid noise in the logs +func quietRecoverNamespaceRestricted(oc *exutil.CLI, namespace string) error { + oc.NotShowInfo() + defer oc.SetShowInfo() + + logger.Debugf("Recovering namespace %s from privileged", namespace) + return exutil.RecoverNamespaceRestricted(oc, namespace) +} + +// GetOperatorNode returns the node running the MCO operator pod +func GetOperatorNode(oc *exutil.CLI) (*Node, error) { + podsList := NewNamespacedResourceList(oc.AsAdmin(), "pods", MachineConfigNamespace) + podsList.ByLabel("k8s-app=machine-config-operator") + + mcoPods, err := podsList.GetAll() + if err != nil { + return nil, err + } + + if len(mcoPods) != 1 { + return nil, fmt.Errorf("There should be 1 and only 1 MCO operator pod. Found operator pods: %s", mcoPods) + } + + nodeName, err := mcoPods[0].Get(`{.spec.nodeName}`) + if err != nil { + return nil, err + } + + return NewNode(oc, nodeName), nil +} + +// CopyFromLocal copies a local file to the node +func (n *Node) CopyFromLocal(from, to string) error { + immediate := true + waitErr := wait.PollUntilContextTimeout(context.TODO(), 1*time.Minute, 5*time.Minute, immediate, func(_ context.Context) (bool, error) { + kubeletReady := n.IsReady() + if kubeletReady { + return true, nil + } + logger.Warnf("Kubelet is not ready in %s. To copy the file to the node we need to wait for kubelet to be ready. Waiting...", n) + return false, nil + }) + + if waitErr != nil { + logger.Errorf("Cannot copy file %s to %s in node %s because Kubelet is not ready in this node", from, to, n) + return waitErr + } + + return n.oc.Run("adm").Args("copy-to-node", "node/"+n.GetName(), fmt.Sprintf("--copy=%s=%s", from, to)).Execute() +} + +// CopyToLocal Copy a file or directory in the node to a local path +func (n *Node) CopyToLocal(from, to string) error { + logger.Infof("Node: %s. Copying file %s to local path %s", + n.GetName(), from, to) + mcDaemonName := n.GetMachineConfigDaemon() + fromDaemon := filepath.Join("/rootfs", from) + + return n.oc.Run("cp").Args("-n", MachineConfigNamespace, mcDaemonName+":"+fromDaemon, to, "-c", MachineConfigDaemon).Execute() +} + +// Returns the set of ready nodes in the cluster +func getReadyNodes(oc *exutil.CLI) (sets.Set[string], error) { + nodeList := NewResourceList(oc.AsAdmin(), "nodes") + nodes, err := nodeList.GetAll() + if err != nil { + return nil, err + } + + nodeSet := sets.New[string]() + for _, node := range nodes { + node.oc.NotShowInfo() + isReady, err := node.Get(`{.status.conditions[?(@.type=="Ready")].status}`) + if err == nil && isReady == TrueString { + nodeSet.Insert(node.name) + } + node.oc.SetShowInfo() + } + return nodeSet, nil +} diff --git a/test/extended-priv/secret.go b/test/extended-priv/secret.go index 59eea83caa..308295bf31 100644 --- a/test/extended-priv/secret.go +++ b/test/extended-priv/secret.go @@ -1,6 +1,8 @@ package extended import ( + "encoding/base64" + "encoding/json" "fmt" "os" "path/filepath" @@ -46,6 +48,31 @@ func (s Secret) GetDataValue(key string) (string, error) { return s.oc.AsAdmin().WithoutNamespace().Run("get").Args(s.GetKind(), s.GetName(), "-n", s.GetNamespace(), templateArg).Output() } +// GetDecodedDataMap returns the secret's data as a map with decoded values +func (s Secret) GetDecodedDataMap() (map[string]string, error) { + data := map[string]string{} + dataJSON, err := s.Get(`{.data}`) + if err != nil { + return nil, err + } + + if err := json.Unmarshal([]byte(dataJSON), &data); err != nil { + return nil, err + } + + for k, vb64 := range data { + v, err := base64.StdEncoding.DecodeString(vb64) + if err != nil { + logger.Errorf("The certiifcate provided in the kubeconfig is not base64 encoded") + return nil, err + } + // Replace the original encoded value with the decoded value + data[k] = string(v) + } + + return data, nil +} + // SetDataValue sets a key/value to store in the secret func (s Secret) SetDataValue(key, value string) error { // silently set the value so that we don't print the secret in the logs leaking sensible information diff --git a/test/extended-priv/util.go b/test/extended-priv/util.go index 37d9c9ebd0..9930a7f4c5 100644 --- a/test/extended-priv/util.go +++ b/test/extended-priv/util.go @@ -7,11 +7,11 @@ import ( "path/filepath" "regexp" "sort" + "strconv" "strings" "sync" "time" - "github.com/Masterminds/semver/v3" "github.com/google/uuid" exutil "github.com/openshift/machine-config-operator/test/extended-priv/util" logger "github.com/openshift/machine-config-operator/test/extended-priv/util/logext" @@ -468,33 +468,152 @@ func OrFail[T any](vals ...any) T { return vals[0].(T) } -// skipTestIfRHELVersion skips the test if the RHEL version matches the constraint -func skipTestIfRHELVersion(node Node, operator, constraintVersion string) { - actualVersion, err := node.GetRHELVersion() - o.Expect(err).NotTo(o.HaveOccurred(), "Error getting RHEL version from node %s", node.GetName()) +// QuoteIfNotJSON quotes a string if it's not valid JSON +func QuoteIfNotJSON(s string) string { + var js interface{} + if json.Unmarshal([]byte(s), &js) == nil { + // It's valid JSON → return as is + return s + } + // Not valid JSON → return quoted JSON string + b, err := json.Marshal(s) + if err != nil { + e2e.Failf("The provided string cannot be JSON encoded: %s", s) + } + return string(b) +} + +// IsSNO returns true if the cluster is a SNO cluster +func IsSNO(oc *exutil.CLI) bool { + allNodes, err := NewNodeList(oc.AsAdmin()).GetAll() + if err != nil { + return false + } + return len(allNodes) == 1 +} + +// SkipIfSNO skips the test case if the cluster is a SNO cluster +func SkipIfSNO(oc *exutil.CLI) { + if IsSNO(oc) { + g.Skip("There is only 1 node in the cluster. This test is not supported in SNO clusters") + } +} + +// WorkersCanBeScaled returns true if worker nodes can be scaled using machinesets +func WorkersCanBeScaled(oc *exutil.CLI) (bool, error) { + platform := exutil.CheckPlatform(oc) + logger.Infof("Checking if in this cluster workers can be scaled using machinesets") + + // Baremetal and None platforms cannot scale workers + if platform == "baremetal" || platform == "none" || platform == "" { + logger.Infof("Baremetal/None platform. Can't scale up nodes in Baremetal test environments. Nodes cannot be scaled") + return false, nil + } + + // Check if MachineAPI capability is enabled + if !IsCapabilityEnabled(oc.AsAdmin(), "MachineAPI") { + logger.Infof("MachineAPI capability is disabled. Nodes cannot be scaled") + return false, nil + } + + // Get all machinesets + msl, err := NewMachineSetList(oc.AsAdmin(), MachineAPINamespace).GetAll() + if err != nil { + logger.Errorf("Error getting a list of MachineSet resources") + return false, err + } + + // If there is no machineset then clearly we can't use them to scale the workers + if len(msl) == 0 { + logger.Infof("No machineset configured. Nodes cannot be scaled") + return false, nil + } + + totalworkers := 0 + for _, ms := range msl { + replicas, err := ms.Get(`{.spec.replicas}`) + if err != nil { + logger.Errorf("Error getting the number of replicas in %s", ms) + return false, err + } + if replicas != "" { + intReplicas, err := strconv.Atoi(replicas) + if err == nil { + totalworkers += intReplicas + } + } + } + + // In some UPI/SNO/Compact clusters machineset resources exist, but they are all configured with 0 replicas + // If all machinesets have 0 replicas, then it means that we need to skip the test case + if totalworkers == 0 { + logger.Infof("All machinesets have 0 worker nodes. Nodes cannot be scaled") + return false, nil + } + + return true, nil +} + +// skipTestIfWorkersCannotBeScaled skips the current test if the worker pool cannot be scaled via machineset +func skipTestIfWorkersCannotBeScaled(oc *exutil.CLI) { + canBeScaled, err := WorkersCanBeScaled(oc) + o.ExpectWithOffset(1, err).NotTo(o.HaveOccurred(), "Error deciding if worker nodes can be scaled using machinesets") - // Pad version to semantic version format if needed (e.g., "9.6" -> "9.6.0") - parts := strings.Split(actualVersion, ".") - for len(parts) < 3 { - parts = append(parts, "0") + if !canBeScaled { + g.Skip("Worker nodes cannot be scaled using machinesets. This test cannot be execute if workers cannot be scaled via machineset, IPI clusters.") } - paddedVersion := strings.Join(parts, ".") +} - // Pad constraint version as well - constraintParts := strings.Split(constraintVersion, ".") - for len(constraintParts) < 3 { - constraintParts = append(constraintParts, "0") +// getEnabledFeatureGates returns the list of enabled feature gates +func getEnabledFeatureGates(oc *exutil.CLI) ([]string, error) { + enabledFeatureGates, err := NewResource(oc.AsAdmin(), "featuregate", "cluster").Get(`{.status.featureGates[0].enabled[*].name}`) + if err != nil { + return nil, err } - paddedConstraintVersion := strings.Join(constraintParts, ".") - // Parse versions for comparison - constraint, err := semver.NewConstraint(operator + paddedConstraintVersion) - o.Expect(err).NotTo(o.HaveOccurred(), "Error parsing version constraint") + if strings.TrimSpace(enabledFeatureGates) == "" { + return []string{}, nil + } - actual, err := semver.NewVersion(paddedVersion) - o.Expect(err).NotTo(o.HaveOccurred(), "Error parsing actual version %s (padded from %s)", paddedVersion, actualVersion) + return strings.Split(enabledFeatureGates, " "), nil +} + +// IsFeaturegateEnabled checks whether a featuregate is enabled or not +func IsFeaturegateEnabled(oc *exutil.CLI, featuregate string) (bool, error) { + enabledFeatureGates, err := getEnabledFeatureGates(oc) + if err != nil { + return false, err + } + for _, f := range enabledFeatureGates { + if f == featuregate { + return true, nil + } + } + return false, nil +} + +// SkipIfNoFeatureGate skips the test if the specified feature gate is not enabled +func SkipIfNoFeatureGate(oc *exutil.CLI, featuregate string) { + enabled, err := IsFeaturegateEnabled(oc, featuregate) + o.Expect(err).NotTo(o.HaveOccurred(), "Error getting enabled featuregates") + + if !enabled { + g.Skip(fmt.Sprintf("Featuregate %s is not enabled in this cluster", featuregate)) + } +} + +// skipTestIfSupportedPlatformNotMatched skips the test if the current platform is not in the supported list +func skipTestIfSupportedPlatformNotMatched(oc *exutil.CLI, supported ...string) { + var match bool + p := exutil.CheckPlatform(oc) + for _, sp := range supported { + if strings.EqualFold(sp, p) { + match = true + break + } + } - if constraint.Check(actual) { - g.Skip(fmt.Sprintf("Test requires RHEL version NOT %s %s, but node has %s", operator, constraintVersion, actualVersion)) + if !match { + g.Skip(fmt.Sprintf("skip test because current platform %s is not in supported list %v", p, supported)) } } diff --git a/test/extended-priv/util/architecture/architecture.go b/test/extended-priv/util/architecture/architecture.go new file mode 100644 index 0000000000..23cdb1755d --- /dev/null +++ b/test/extended-priv/util/architecture/architecture.go @@ -0,0 +1,84 @@ +package architecture + +import ( + e2e "k8s.io/kubernetes/test/e2e/framework" +) + +type Architecture int + +const ( + AMD64 Architecture = iota + ARM64 + PPC64LE + S390X + MULTI + UNKNOWN +) + +// String constants for architecture names +const ( + amd64String = "amd64" + arm64String = "arm64" + ppc64leString = "ppc64le" + s390xString = "s390x" + multiString = "multi" + x86_64String = "x86_64" + aarch64String = "aarch64" +) + +// FromString returns the Architecture value for the given string +func FromString(arch string) Architecture { + switch arch { + case amd64String: + return AMD64 + case arm64String: + return ARM64 + case ppc64leString: + return PPC64LE + case s390xString: + return S390X + case multiString: + return MULTI + default: + e2e.Failf("Unknown architecture %s", arch) + } + return AMD64 +} + +// String returns the string value for the given Architecture +func (a Architecture) String() string { + switch a { + case AMD64: + return amd64String + case ARM64: + return arm64String + case PPC64LE: + return ppc64leString + case S390X: + return s390xString + case MULTI: + return multiString + default: + e2e.Failf("Unknown architecture %d", a) + } + return "" +} + +// GNUString returns the GNU-style architecture string (x86_64, aarch64, etc.) +func (a Architecture) GNUString() string { + switch a { + case AMD64: + return x86_64String + case ARM64: + return aarch64String + case PPC64LE: + return ppc64leString + case S390X: + return s390xString + case MULTI: + return multiString + default: + e2e.Failf("Unknown architecture %d", a) + } + return "" +} diff --git a/test/extended-priv/util/machine_helpers.go b/test/extended-priv/util/machine_helpers.go new file mode 100644 index 0000000000..cb886998c1 --- /dev/null +++ b/test/extended-priv/util/machine_helpers.go @@ -0,0 +1,11 @@ +package util + +import ( + "strings" +) + +// CheckPlatform returns the platform type of the cluster +func CheckPlatform(oc *CLI) string { + output, _ := oc.AsAdmin().WithoutNamespace().Run("get").Args("infrastructure", "cluster", "-o=jsonpath={.status.platformStatus.type}").Output() + return strings.ToLower(output) +} diff --git a/test/extended-priv/versions.go b/test/extended-priv/versions.go new file mode 100644 index 0000000000..cfa98982f7 --- /dev/null +++ b/test/extended-priv/versions.go @@ -0,0 +1,73 @@ +package extended + +import ( + "strconv" + "strings" + + e2e "k8s.io/kubernetes/test/e2e/framework" +) + +var operators = map[string][]int{ + "<": {1}, + ">": {-1}, + "=": {0}, + "==": {0}, + "<=": {0, 1}, + "=<": {0, 1}, + ">=": {0, -1}, + "=>": {0, -1}, +} + +func validateOperator(operator string) { + keys := make([]string, 0, len(operators)) + for k := range operators { + keys = append(keys, k) + if operator == k { + return + } + } + + e2e.Failf("Operator %s not permitted. Permitted operators: %s", operator, keys) +} + +func compareVer(l, r string) (ret int) { + ls := strings.Split(l, ".") + rs := strings.Split(r, ".") + maxlen := len(ls) + if len(rs) > len(ls) { + maxlen = len(rs) + } + for i := 0; i < maxlen; i++ { + var tmpl, tmpr string + if len(ls) > i { + tmpl = ls[i] + } + if len(rs) > i { + tmpr = rs[i] + } + li, _ := strconv.Atoi(tmpl) + ri, _ := strconv.Atoi(tmpr) + if li > ri { + return -1 + } else if li < ri { + return 1 + } + } + return 0 +} + +// CompareVersions returns the result of comparing 2 versions using the given operator to compare +// i.e CompareVersions("3.1", ">", "3.0") return true +func CompareVersions(l, operator, r string) bool { + validateOperator(operator) + expectedResults := operators[operator] + result := compareVer(l, r) + + for _, res := range expectedResults { + if result == res { + return true + } + } + + return false +}