diff --git a/control-plane-operator/controllers/hostedcontrolplane/nto/clusternodetuningoperator.go b/control-plane-operator/controllers/hostedcontrolplane/nto/clusternodetuningoperator.go index 4c86b1ad38c..c4dc8756360 100644 --- a/control-plane-operator/controllers/hostedcontrolplane/nto/clusternodetuningoperator.go +++ b/control-plane-operator/controllers/hostedcontrolplane/nto/clusternodetuningoperator.go @@ -146,7 +146,7 @@ func ReconcileDeployment(dep *appsv1.Deployment, params Params) error { } ntoArgs := []string{ - "-v=2", + "-v=0", } var ntoEnv []corev1.EnvVar diff --git a/docs/content/how-to/node-tuning.md b/docs/content/how-to/node-tuning.md index 4264909f814..2e9f41d4b84 100644 --- a/docs/content/how-to/node-tuning.md +++ b/docs/content/how-to/node-tuning.md @@ -1,6 +1,7 @@ # Manage node-level tuning with the Node Tuning Operator -If you would like to set some node-level tuning on the nodes in your hosted cluster, you can use the [Node Tuning Operator](https://docs.openshift.com/container-platform/4.11/scalability_and_performance/using-node-tuning-operator.html). In HyperShift, node tuning can be configured by creating ConfigMaps which contain Tuned objects, and referencing these ConfigMaps in your NodePools. Currently Node Tuning is limited to tunables which the TuneD daemon can apply directly like setting `sysctl` values. Tuning that requires setting kernel boot parameters is not yet supported in HyperShift. +## Creating a simple TuneD profile for setting sysctl settings +If you would like to set some node-level tuning on the nodes in your hosted cluster, you can use the [Node Tuning Operator](https://docs.openshift.com/container-platform/latest/scalability_and_performance/using-node-tuning-operator.html). In HyperShift, node tuning can be configured by creating ConfigMaps which contain Tuned objects, and referencing these ConfigMaps in your NodePools. 1. Create a ConfigMap which contains a valid Tuned manifest and reference it in a NodePool. The example Tuned manifest below defines a profile which sets `vm.dirty_ratio` to 55, on Nodes which contain the Node label `tuned-1-node-label` with any value. @@ -84,7 +85,7 @@ If you would like to set some node-level tuning on the nodes in your hosted clus nodepool-1-worker-2 tuned-1-profile True False 7m14s ``` - As we can see, both worker nodes in the nodepool have the tuned-1-profile applied. Note that if no custom profiles are created, the `openshift-node` profile will be applied by default. + As we can see, both worker nodes in the NodePool have the tuned-1-profile applied. Note that if no custom profiles are created, the `openshift-node` profile will be applied by default. 3. To confirm the tuning was applied correctly, we can start a debug shell on a Node and check the sysctl values: @@ -95,4 +96,126 @@ If you would like to set some node-level tuning on the nodes in your hosted clus Example output: ``` vm.dirty_ratio = 55 + ``` + +## Applying tuning which requires kernel boot parameters +You can also use the Node Tuning Operator for more complex tuning which requires setting kernel boot parameters. +As an example, the following steps can be followed to create a NodePool with huge pages reserved. + +1. Create the following ConfigMap which contains a Tuned object manifest for creating 10 hugepages of size 2M. + + Save this ConfigMap manifest in a file called `tuned-hugepages.yaml`: + ``` + apiVersion: v1 + kind: ConfigMap + metadata: + name: tuned-hugepages + namespace: clusters + data: + tuned: | + apiVersion: tuned.openshift.io/v1 + kind: Tuned + metadata: + name: hugepages + namespace: openshift-cluster-node-tuning-operator + spec: + profile: + - data: | + [main] + summary=Boot time configuration for hugepages + include=openshift-node + [bootloader] + cmdline_openshift_node_hugepages=hugepagesz=2M hugepages=50 + name: openshift-node-hugepages + recommend: + - priority: 20 + profile: openshift-node-hugepages + ``` + > **_NOTE:_** The `.spec.recommend.match` field is intentionally left blank. In this case this Tuned will be applied to all Nodes in the NodePool where this ConfigMap is referenced. It is advised to group Nodes with the same hardware configuration into the same NodePool. Not following this practice might result in TuneD operands calculating conflicting kernel parameters for two or more nodes sharing the same NodePool. + + Create the ConfigMap in the management cluster: + ``` + oc --kubeconfig="$MGMT_KUBECONFIG" create -f tuned-hugepages.yaml + ``` + +2. Create a new NodePool manifest YAML file, customize the NodePools upgrade type, and reference the previously created ConfigMap in the `spec.tunedConfig` section before creating it in the management cluster. + + Create the NodePool manifest and save it in a file called `hugepages-nodepool.yaml`: + ``` + NODEPOOL_NAME=hugepages-example + INSTANCE_TYPE=m5.2xlarge + NODEPOOL_REPLICAS=2 + + hypershift create nodepool aws \ + --cluster-name $CLUSTER_NAME \ + --name $NODEPOOL_NAME \ + --node-count $NODEPOOL_REPLICAS \ + --instance-type $INSTANCE_TYPE \ + --render > hugepages-nodepool.yaml + ``` + + Edit `hugepages-nodepool.yaml`. Set `.spec.management.upgradeType` to `InPlace`, and set `.spec.tunedConfig` to reference the `tuned-hugepages` ConfigMap you created. + ``` + apiVersion: hypershift.openshift.io/v1alpha1 + kind: NodePool + metadata: + name: hugepages-nodepool + namespace: clusters + ... + spec: + management: + ... + upgradeType: InPlace + ... + tunedConfig: + - name: tuned-hugepages + ``` + > **_NOTE:_** Setting `.spec.management.upgradeType` to `InPlace` is recommended to avoid unnecessary Node recreations when applying the new MachineConfigs. With the `Replace` upgrade type, Nodes will be fully deleted and new nodes will replace them when applying the new kernel boot parameters that are calculated by the TuneD operand. + + Create the NodePool in the management cluster: + ``` + oc --kubeconfig="$MGMT_KUBECONFIG" create -f hugepages-nodepool.yaml + ``` + + +3. After the Nodes become available, the containerized TuneD daemon will calculate the required kernel boot parameters based on the applied TuneD profile. After the Nodes become `Ready` and reboot once to apply the generated MachineConfig, you can verify that the Tuned profile is applied and that the kernel boot parameters have been set. + + List the Tuned objects in the hosted cluster: + ``` + oc --kubeconfig="$HC_KUBECONFIG" get Tuneds -n openshift-cluster-node-tuning-operator + ``` + + Example output: + ``` + NAME AGE + default 123m + hugepages-8dfb1fed 1m23s + rendered 123m + ``` + + List the Profiles in the hosted cluster: + ``` + oc --kubeconfig="$HC_KUBECONFIG" get Profiles -n openshift-cluster-node-tuning-operator + ``` + + Example output: + ``` + NAME TUNED APPLIED DEGRADED AGE + nodepool-1-worker-1 openshift-node True False 132m + nodepool-1-worker-2 openshift-node True False 131m + hugepages-nodepool-worker-1 openshift-node-hugepages True False 4m8s + hugepages-nodepool-worker-2 openshift-node-hugepages True False 3m57s + ``` + + Both worker nodes in the new NodePool have the `openshift-node-hugepages` profile applied. + + +4. To confirm the tuning was applied correctly, we can start a debug shell on a Node and check `/proc/cmdline` + ``` + oc --kubeconfig="$HC_KUBECONFIG" debug node/nodepool-1-worker-1 -- chroot /host cat /proc/cmdline + ``` + + Example output: + ``` + BOOT_IMAGE=(hd0,gpt3)/ostree/rhcos-... hugepagesz=2M hugepages=50 ``` \ No newline at end of file diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index a38c38bf91b..89e606242ba 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -81,8 +81,9 @@ const ( TokenSecretConfigKey = "config" TokenSecretAnnotation = "hypershift.openshift.io/ignition-config" - tunedConfigKey = "tuned" - tunedConfigMapLabel = "hypershift.openshift.io/tuned-config" + tunedConfigKey = "tuned" + tunedConfigMapLabel = "hypershift.openshift.io/tuned-config" + nodeTuningGeneratedConfigLabel = "hypershift.openshift.io/nto-generated-machine-config" controlPlaneOperatorManagesDecompressAndDecodeConfig = "io.openshift.hypershift.control-plane-operator-manages.decompress-decode-config" ) @@ -938,6 +939,9 @@ func reconcileUserDataSecret(userDataSecret *corev1.Secret, nodePool *hyperv1.No return nil } +// reconcileTunedConfigMap inserts the Tuned object manifest in tunedConfig into ConfigMap tunedConfigMap. +// This is used to mirror the Tuned object manifest into the control plane namespace, for the Node +// Tuning Operator to mirror and reconcile in the hosted cluster. func reconcileTunedConfigMap(tunedConfigMap *corev1.ConfigMap, nodePool *hyperv1.NodePool, tunedConfig string) error { tunedConfigMap.Immutable = k8sutilspointer.BoolPtr(false) if tunedConfigMap.Annotations == nil { @@ -1321,6 +1325,17 @@ func (r *NodePoolReconciler) getConfig(ctx context.Context, configs = append(configs, *configConfigMap) } + // Look for NTO generated MachineConfigs from the hosted control plane namespace + nodeTuningGeneratedConfigs := &corev1.ConfigMapList{} + if err := r.List(ctx, nodeTuningGeneratedConfigs, client.MatchingLabels{ + nodeTuningGeneratedConfigLabel: "true", + hyperv1.NodePoolLabel: nodePool.GetName(), + }, client.InNamespace(controlPlaneResource)); err != nil { + errors = append(errors, err) + } + + configs = append(configs, nodeTuningGeneratedConfigs.Items...) + for _, config := range configs { manifestRaw := config.Data[TokenSecretConfigKey] manifest, err := defaultAndValidateConfigManifest([]byte(manifestRaw)) @@ -1676,6 +1691,20 @@ func (r *NodePoolReconciler) enqueueNodePoolsForConfig(obj client.Object) []reco return enqueueParentNodePool(obj) } + // Check if the ConfigMap is generated by an operator in the control plane namespace + // corresponding to this nodepool. + if _, ok := obj.GetLabels()[nodeTuningGeneratedConfigLabel]; ok { + nodePoolName := obj.GetLabels()[hyperv1.NodePoolLabel] + nodePoolNamespacedName, err := r.getNodePoolNamespacedName(nodePoolName, obj.GetNamespace()) + if err != nil { + return result + } + obj.SetAnnotations(map[string]string{ + nodePoolAnnotation: nodePoolNamespacedName.String(), + }) + return enqueueParentNodePool(obj) + } + // Otherwise reconcile NodePools which are referencing the given ConfigMap. for key := range nodePoolList.Items { reconcileNodePool := false @@ -1706,6 +1735,24 @@ func (r *NodePoolReconciler) enqueueNodePoolsForConfig(obj client.Object) []reco return result } +// getNodePoolNamespace returns the namespaced name of a NodePool, given the NodePools name +// and the control plane namespace name for the hosted cluster that this NodePool is a part of. +func (r *NodePoolReconciler) getNodePoolNamespacedName(nodePoolName string, controlPlaneNamespace string) (types.NamespacedName, error) { + hcpList := &hyperv1.HostedControlPlaneList{} + if err := r.List(context.Background(), hcpList, &client.ListOptions{ + Namespace: controlPlaneNamespace, + }); err != nil || len(hcpList.Items) < 1 { + return types.NamespacedName{Name: nodePoolName}, err + } + hostedCluster, ok := hcpList.Items[0].Annotations[hostedcluster.HostedClusterAnnotation] + if !ok { + return types.NamespacedName{Name: nodePoolName}, fmt.Errorf("failed to get Hosted Cluster name for HostedControlPlane %s", hcpList.Items[0].Name) + } + nodePoolNamespace := supportutil.ParseNamespacedName(hostedCluster).Namespace + + return types.NamespacedName{Name: nodePoolName, Namespace: nodePoolNamespace}, nil +} + func enqueueParentNodePool(obj client.Object) []reconcile.Request { var nodePoolName string if obj.GetAnnotations() != nil { diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go index 207d1548dae..ea28bf6d6ca 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go @@ -13,6 +13,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" api "github.com/openshift/hypershift/api" hyperv1 "github.com/openshift/hypershift/api/v1alpha1" + "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster" "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" "github.com/openshift/hypershift/support/releaseinfo" "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/dockerv1client" @@ -21,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" k8sutilspointer "k8s.io/utils/pointer" capiaws "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" @@ -1518,6 +1520,85 @@ func TestGetName(t *testing.T) { g.Expect(alphaNumeric.MatchString(string(name[0]))).To(BeTrue()) } +func TestGetNodePoolNamespacedName(t *testing.T) { + testControlPlaneNamespace := "control-plane-ns" + testNodePoolNamespace := "clusters" + testNodePoolName := "nodepool-1" + testCases := []struct { + name string + nodePoolName string + controlPlaneNamespace string + hostedControlPlane *hyperv1.HostedControlPlane + expect string + error bool + }{ + { + name: "gets correct NodePool namespaced name", + nodePoolName: testNodePoolName, + controlPlaneNamespace: testControlPlaneNamespace, + hostedControlPlane: &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testControlPlaneNamespace, + Annotations: map[string]string{ + hostedcluster.HostedClusterAnnotation: types.NamespacedName{Name: "hosted-cluster-1", Namespace: testNodePoolNamespace}.String(), + }, + }, + }, + expect: types.NamespacedName{Name: testNodePoolName, Namespace: testNodePoolNamespace}.String(), + error: false, + }, + { + name: "fails if HostedControlPlane missing HostedClusterAnnotation", + nodePoolName: testNodePoolName, + controlPlaneNamespace: testControlPlaneNamespace, + hostedControlPlane: &hyperv1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: testControlPlaneNamespace, + }, + }, + expect: "", + error: true, + }, + { + name: "fails if HostedControlPlane does not exist", + nodePoolName: testNodePoolName, + controlPlaneNamespace: testControlPlaneNamespace, + hostedControlPlane: nil, + expect: "", + error: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + var r NodePoolReconciler + if tc.hostedControlPlane == nil { + r = NodePoolReconciler{ + Client: fake.NewClientBuilder().WithObjects().Build(), + } + } else { + r = NodePoolReconciler{ + Client: fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(tc.hostedControlPlane).Build(), + } + } + + got, err := r.getNodePoolNamespacedName(testNodePoolName, testControlPlaneNamespace) + + if tc.error { + g.Expect(err).To(HaveOccurred()) + return + } + g.Expect(err).ToNot(HaveOccurred()) + if diff := cmp.Diff(got.String(), tc.expect); diff != "" { + t.Errorf("actual NodePool namespaced name differs from expected: %s", diff) + t.Logf("got: %s \n, expected: \n %s", got, tc.expect) + } + }) + } +} + func TestSetExpirationTimestampOnToken(t *testing.T) { fakeName := "test-token" fakeNamespace := "master-cluster1" diff --git a/test/e2e/nodepool_nto_machineconfig_test.go b/test/e2e/nodepool_nto_machineconfig_test.go new file mode 100644 index 00000000000..1db5881ec42 --- /dev/null +++ b/test/e2e/nodepool_nto_machineconfig_test.go @@ -0,0 +1,280 @@ +//go:build e2e +// +build e2e + +package e2e + +import ( + "context" + _ "embed" + "testing" + "time" + + . "github.com/onsi/gomega" + + hyperv1 "github.com/openshift/hypershift/api/v1alpha1" + e2eutil "github.com/openshift/hypershift/test/e2e/util" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + crclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" +) + +const ( + hugepagesTuned = `apiVersion: tuned.openshift.io/v1 +kind: Tuned +metadata: + name: hugepages + namespace: openshift-cluster-node-tuning-operator +spec: + profile: + - data: | + [main] + summary=Boot time configuration for hugepages + include=openshift-node + [bootloader] + cmdline_openshift_node_hugepages=hugepagesz=2M hugepages=4 + name: openshift-hugepages + recommend: + - priority: 20 + profile: openshift-hugepages +` + + hypershiftNodePoolNameLabel = "hypershift.openshift.io/nodePoolName" // HyperShift-enabled NTO adds this label to Tuned CRs bound to NodePools + tunedConfigKey = "tuned" +) + +func TestNTOMachineConfigGetsRolledOut(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + ctx, cancel := context.WithCancel(testContext) + defer cancel() + + client, err := e2eutil.GetClient() + g.Expect(err).NotTo(HaveOccurred(), "failed to get k8s client") + + clusterOpts := globalOpts.DefaultClusterOptions(t) + clusterOpts.ControlPlaneAvailabilityPolicy = string(hyperv1.SingleReplica) + clusterOpts.BeforeApply = func(o crclient.Object) { + nodePool, isNodepool := o.(*hyperv1.NodePool) + if !isNodepool { + return + } + nodePool.Spec.Management.Replace = &hyperv1.ReplaceUpgrade{ + Strategy: hyperv1.UpgradeStrategyRollingUpdate, + RollingUpdate: &hyperv1.RollingUpdate{ + MaxUnavailable: func(v intstr.IntOrString) *intstr.IntOrString { return &v }(intstr.FromInt(0)), + MaxSurge: func(v intstr.IntOrString) *intstr.IntOrString { return &v }(intstr.FromInt(int(*nodePool.Spec.Replicas))), + }, + } + } + + hostedCluster := e2eutil.CreateCluster(t, ctx, client, &clusterOpts, globalOpts.Platform, globalOpts.ArtifactDir) + + // Sanity check the cluster by waiting for the nodes to report ready + t.Logf("Waiting for guest client to become available") + guestClient := e2eutil.WaitForGuestClient(t, testContext, client, hostedCluster) + + // Wait for Nodes to be Ready + numNodes := int32(globalOpts.configurableClusterOptions.NodePoolReplicas * len(clusterOpts.AWSPlatform.Zones)) + e2eutil.WaitForNReadyNodes(t, testContext, guestClient, numNodes, hostedCluster.Spec.Platform.Type) + + // Wait for the rollout to be complete + t.Logf("Waiting for cluster rollout. Image: %s", globalOpts.LatestReleaseImage) + e2eutil.WaitForImageRollout(t, testContext, client, guestClient, hostedCluster, globalOpts.LatestReleaseImage) + err = client.Get(testContext, crclient.ObjectKeyFromObject(hostedCluster), hostedCluster) + g.Expect(err).NotTo(HaveOccurred(), "failed to get hostedcluster") + + tunedConfigConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hugepages-tuned-test", + Namespace: hostedCluster.Namespace, + }, + Data: map[string]string{tunedConfigKey: hugepagesTuned}, + } + if err := client.Create(ctx, tunedConfigConfigMap); err != nil { + t.Fatalf("failed to create configmap for custom Tuned object: %v", err) + } + + nodePools := &hyperv1.NodePoolList{} + if err := client.List(ctx, nodePools, crclient.InNamespace(hostedCluster.Namespace)); err != nil { + t.Fatalf("failed to list nodepools in namespace %s: %v", hostedCluster.Namespace, err) + } + + var nodePool hyperv1.NodePool + for _, nodePool = range nodePools.Items { + if nodePool.Spec.ClusterName != hostedCluster.Name { + continue + } + + np := nodePool.DeepCopy() + nodePool.Spec.TunedConfig = append(nodePool.Spec.TunedConfig, corev1.LocalObjectReference{Name: tunedConfigConfigMap.Name}) + if err := client.Patch(ctx, &nodePool, crclient.MergeFrom(np)); err != nil { + t.Fatalf("failed to update nodepool %s after adding Tuned config: %v", nodePool.Name, err) + } + } + + ds := ntoMachineConfigUpdatedVerificationDS.DeepCopy() + if err := guestClient.Create(ctx, ds); err != nil { + t.Fatalf("failed to create %s DaemonSet in guestcluster: %v", ds.Name, err) + } + + t.Logf("waiting for rollout of NodePools with NTO-generated config") + err = wait.PollImmediateWithContext(ctx, 5*time.Second, 15*time.Minute, func(ctx context.Context) (bool, error) { + if ctx.Err() != nil { + return false, err + } + pods := &corev1.PodList{} + if err := guestClient.List(ctx, pods, crclient.InNamespace(ds.Namespace), crclient.MatchingLabels(ds.Spec.Selector.MatchLabels)); err != nil { + t.Logf("WARNING: failed to list pods, will retry: %v", err) + return false, nil + } + nodes := &corev1.NodeList{} + if err := guestClient.List(ctx, nodes); err != nil { + t.Logf("WARNING: failed to list nodes, will retry: %v", err) + return false, nil + } + if len(pods.Items) != len(nodes.Items) { + return false, nil + } + + for _, pod := range pods.Items { + if !isPodReady(&pod) { + return false, nil + } + } + + return true, nil + }) + if err != nil { + t.Fatalf("failed waiting for all pods in the NTO MachineConfig update verification DS to be ready: %v", err) + } + + e2eutil.EnsureNodeCountMatchesNodePoolReplicas(t, testContext, client, guestClient, hostedCluster.Namespace) + e2eutil.EnsureNoCrashingPods(t, ctx, client, hostedCluster) + e2eutil.EnsureAllContainersHavePullPolicyIfNotPresent(t, ctx, client, hostedCluster) + e2eutil.EnsureHCPContainersHaveResourceRequests(t, ctx, client, hostedCluster) + e2eutil.EnsureNoPodsWithTooHighPriority(t, ctx, client, hostedCluster) +} + +func TestNTOMachineConfigAppliedInPlace(t *testing.T) { + t.Parallel() + g := NewWithT(t) + + ctx, cancel := context.WithCancel(testContext) + defer cancel() + + client, err := e2eutil.GetClient() + g.Expect(err).NotTo(HaveOccurred(), "failed to get k8s client") + + clusterOpts := globalOpts.DefaultClusterOptions(t) + clusterOpts.ControlPlaneAvailabilityPolicy = string(hyperv1.SingleReplica) + clusterOpts.BeforeApply = func(o crclient.Object) { + nodePool, isNodepool := o.(*hyperv1.NodePool) + if !isNodepool { + return + } + nodePool.Spec.Management.UpgradeType = hyperv1.UpgradeTypeInPlace + } + + hostedCluster := e2eutil.CreateCluster(t, ctx, client, &clusterOpts, globalOpts.Platform, globalOpts.ArtifactDir) + + // Sanity check the cluster by waiting for the nodes to report ready + t.Logf("Waiting for guest client to become available") + guestClient := e2eutil.WaitForGuestClient(t, testContext, client, hostedCluster) + + // Wait for Nodes to be Ready + numNodes := int32(globalOpts.configurableClusterOptions.NodePoolReplicas * len(clusterOpts.AWSPlatform.Zones)) + e2eutil.WaitForNReadyNodes(t, testContext, guestClient, numNodes, hostedCluster.Spec.Platform.Type) + + // Wait for the rollout to be complete + t.Logf("Waiting for cluster rollout. Image: %s", globalOpts.LatestReleaseImage) + e2eutil.WaitForImageRollout(t, testContext, client, guestClient, hostedCluster, globalOpts.LatestReleaseImage) + err = client.Get(testContext, crclient.ObjectKeyFromObject(hostedCluster), hostedCluster) + g.Expect(err).NotTo(HaveOccurred(), "failed to get hostedcluster") + + tunedConfigConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hugepages-tuned-test", + Namespace: hostedCluster.Namespace, + }, + Data: map[string]string{tunedConfigKey: hugepagesTuned}, + } + if err := client.Create(ctx, tunedConfigConfigMap); err != nil { + t.Fatalf("failed to create configmap for custom Tuned object: %v", err) + } + + nodePools := &hyperv1.NodePoolList{} + if err := client.List(ctx, nodePools, crclient.InNamespace(hostedCluster.Namespace)); err != nil { + t.Fatalf("failed to list nodepools in namespace %s: %v", hostedCluster.Namespace, err) + } + + var nodePool hyperv1.NodePool + for _, nodePool = range nodePools.Items { + if nodePool.Spec.ClusterName != hostedCluster.Name { + continue + } + + np := nodePool.DeepCopy() + nodePool.Spec.TunedConfig = append(nodePool.Spec.TunedConfig, corev1.LocalObjectReference{Name: tunedConfigConfigMap.Name}) + if err := client.Patch(ctx, &nodePool, crclient.MergeFrom(np)); err != nil { + t.Fatalf("failed to update nodepool %s after adding Tuned config: %v", nodePool.Name, err) + } + } + + ds := ntoMachineConfigUpdatedVerificationDS.DeepCopy() + if err := guestClient.Create(ctx, ds); err != nil { + t.Fatalf("failed to create %s DaemonSet in guestcluster: %v", ds.Name, err) + } + + t.Logf("waiting for NodePools in-place update with NTO-generated MachineConfig") + err = wait.PollImmediateWithContext(ctx, 5*time.Second, 15*time.Minute, func(ctx context.Context) (bool, error) { + if ctx.Err() != nil { + return false, err + } + pods := &corev1.PodList{} + if err := guestClient.List(ctx, pods, crclient.InNamespace(ds.Namespace), crclient.MatchingLabels(ds.Spec.Selector.MatchLabels)); err != nil { + t.Logf("WARNING: failed to list pods, will retry: %v", err) + return false, nil + } + nodes := &corev1.NodeList{} + if err := guestClient.List(ctx, nodes); err != nil { + t.Logf("WARNING: failed to list nodes, will retry: %v", err) + return false, nil + } + if len(pods.Items) != len(nodes.Items) { + return false, nil + } + + for _, pod := range pods.Items { + if !isPodReady(&pod) { + return false, nil + } + } + + return true, nil + }) + if err != nil { + t.Fatalf("failed waiting for all pods in the NTO MachineConfig update verification DS to be ready: %v", err) + } + + e2eutil.EnsureNodeCountMatchesNodePoolReplicas(t, testContext, client, guestClient, hostedCluster.Namespace) + e2eutil.EnsureNoCrashingPods(t, ctx, client, hostedCluster) + e2eutil.EnsureAllContainersHavePullPolicyIfNotPresent(t, ctx, client, hostedCluster) + e2eutil.EnsureHCPContainersHaveResourceRequests(t, ctx, client, hostedCluster) + e2eutil.EnsureNoPodsWithTooHighPriority(t, ctx, client, hostedCluster) +} + +//go:embed nodepool_nto_machineconfig_verification_ds.yaml +var ntoMachineConfigUpdatedVerificationDSRaw []byte + +var ntoMachineConfigUpdatedVerificationDS = func() *appsv1.DaemonSet { + ds := &appsv1.DaemonSet{} + if err := yaml.Unmarshal(ntoMachineConfigUpdatedVerificationDSRaw, &ds); err != nil { + panic(err) + } + return ds +}() diff --git a/test/e2e/nodepool_nto_machineconfig_verification_ds.yaml b/test/e2e/nodepool_nto_machineconfig_verification_ds.yaml new file mode 100644 index 00000000000..e7ff1d6b6f3 --- /dev/null +++ b/test/e2e/nodepool_nto_machineconfig_verification_ds.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nto-machineconfig-update-checker + namespace: kube-system +spec: + selector: + matchLabels: + name: nto-machineconfig-update-checker + template: + metadata: + labels: + name: nto-machineconfig-update-checker + spec: + tolerations: + - operator: Exists + containers: + - name: nto-machineconfig-update-checker + image: alpine + command: + - /bin/sleep + - 24h + resources: + requests: + cpu: 100m + memory: 200Mi + readinessProbe: + exec: + command: + - /bin/sh + - -c + - 'cat /proc/cmdline | grep "hugepagesz=2M hugepages=4"' + volumeMounts: + - name: host + mountPath: /host + readOnly: true + terminationGracePeriodSeconds: 30 + volumes: + - name: host + hostPath: + path: /