Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions cmd/nodepool/core/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ import (
"github.com/openshift/hypershift/cmd/log"
"github.com/openshift/hypershift/cmd/util"
hyperapi "github.com/openshift/hypershift/support/api"
"github.com/openshift/hypershift/support/releaseinfo"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

ctrl "sigs.k8s.io/controller-runtime"
crclient "sigs.k8s.io/controller-runtime/pkg/client"

"github.com/blang/semver"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -54,6 +58,10 @@ func (o *CreateNodePoolOptions) Validate(ctx context.Context, c crclient.Client)
return err
}

if err := validMinorVersionCompatibility(ctx, c, o.ClusterName, o.Namespace, o.ReleaseImage, &releaseinfo.RegistryClientProvider{}); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -184,3 +192,95 @@ func validateHostedClusterPayloadSupportsNodePoolCPUArch(ctx context.Context, cl

return nil
}

// validMinorVersionCompatibility validates that the NodePool version is compatible with the HostedCluster version.
// For 4.even versions, it allows y-2 difference.
// For 4.odd versions, it allows y-1 difference.
// NodePool version cannot be higher than control plane version.
func validMinorVersionCompatibility(ctx context.Context, client crclient.Client, name, namespace, nodePoolReleaseImage string, releaseProvider releaseinfo.Provider) error {
if nodePoolReleaseImage == "" {
return nil
}
logger := ctrl.LoggerFrom(ctx)

hcluster := &hyperv1.HostedCluster{}
if err := client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, hcluster); err != nil {
if !apierrors.IsNotFound(err) {
// For other errors (e.g. API server issues, RBAC problems), we should return the error
return fmt.Errorf("failed to get HostedCluster to check version compatibility: %w", err)
}

// This is expected to happen when we create a cluster since there is no created HostedCluster CR to check the
// payload from.
logger.Info("WARNING: failed to get HostedCluster to check version compatibility")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but do you have to allow NodePool creation to complete before there's a HostedCluster? You could decide to block until the HostedCluster exists and have the create caller retry after that point. Or you could loop some number of times or for some duration here on the does-not-exist failure mode, waiting for the HostedCluster to start existing in parallel.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When creating a HostedCluster using the HyperShift CLI, a default NodePool is also created. This is just CLI test code—we also add validation on the controller side.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like that controller-side validation is future-plans, not current-implementation. But I'm fine assuming that there will be some controller-side backstops, and that this client code is just best-effort attempts to warn command-line users without bothering to go as far and slow as an API NodePool creation attempt, and letting these through here with the warning 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#5931 is following up with the HyperShift-operator side validation discussed in this thread.

return nil
}

// Get the control plane version string
var controlPlaneVersionStr string
if len(hcluster.Status.Version.History) == 0 {
// If the cluster is in the process of installation, there is no history
// Use the desired version as the control plane version
controlPlaneVersionStr = hcluster.Status.Version.Desired.Version
} else {
// If the cluster is installed or upgrading
// Start with the most recent version from history as the default
controlPlaneVersionStr = hcluster.Status.Version.History[len(hcluster.Status.Version.History)-1].Version
// Update with any more recent Completed version if found
for _, history := range hcluster.Status.Version.History {
if history.State == "Completed" {
controlPlaneVersionStr = history.Version
break
}
}
}

// Parse control plane version
controlPlaneVersion, err := semver.Parse(controlPlaneVersionStr)
if err != nil {
return fmt.Errorf("parsing control plane version (%s): %w", controlPlaneVersionStr, err)
}

pullSecret := &corev1.Secret{}
if err = client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: hcluster.Spec.PullSecret.Name}, pullSecret); err != nil {
return fmt.Errorf("failed to get pull secret: %w", err)
}

releaseImage, err := releaseProvider.Lookup(ctx, nodePoolReleaseImage, pullSecret.Data[corev1.DockerConfigJsonKey])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't this result in the unit tests actually pulling images? the func should receive the release info provider interface and let the units use a fake implementation, we have some precedents for this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! done

if err != nil {
// Skip version check in disconnected environment where registry access is not available
logger.Info("WARNING: Unable to access the payload, skipping the Minor Version check.", "error", err.Error())
return nil
}

// Parse NodePool version
nodePoolVersion, err := semver.Parse(releaseImage.Version())
if err != nil {
return fmt.Errorf("parsing NodePool version (%s): %w", releaseImage.Version(), err)
}

// NodePool version cannot be higher than control plane version
if nodePoolVersion.GT(controlPlaneVersion) {
return fmt.Errorf("NodePool version %s cannot be higher than the HostedCluster version %s",
nodePoolVersion, controlPlaneVersion)
}

// Calculate minor version difference
versionDiff := int64(controlPlaneVersion.Minor - nodePoolVersion.Minor)

// For 4.even versions, allow y-2 difference
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is the even/odd distinction coming from?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://issues.redhat.com/browse/OCPSTRAT-1707

Requirements (aka. Acceptance Criteria):

  • The HCP documentation accurately reflects the supported NodePool minor version skew of y-2 and removes the floating reference to upstream kubernetes.
  • HCP prevents the creation of NodePools with a skew greater than y-2 (for 4.even control planes) or y-1 (for 4.odd control planes) through code guardrails. Example of how[ its done in standalone OpenShift|https://github.com/Bug 1998552: Enforce OpenShift's defined kubelet version skew policies cluster-kube-apiserver-operator#1199#issue-958481467].
  • Existing NodePools with a skew greater than y-2 are not impacted (but upgrading the control plane may be blocked).
  • Appropriate error messages are provided to users when attempting to create NodePools with an unsupported skew.

// For 4.odd versions, allow y-1 difference
maxAllowedDiff := int64(2)
if controlPlaneVersion.Minor%2 == 1 {
maxAllowedDiff = 1
}

if versionDiff > maxAllowedDiff {
return fmt.Errorf("NodePool minor version %d.%d is not compatible with the HostedCluster minor version %d.%d (max allowed difference: %d)",
nodePoolVersion.Major, nodePoolVersion.Minor,
controlPlaneVersion.Major, controlPlaneVersion.Minor,
maxAllowedDiff)
}

return nil
}
129 changes: 129 additions & 0 deletions cmd/nodepool/core/create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ import (

hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1"
"github.com/openshift/hypershift/support/api"
fakereleaseprovider "github.com/openshift/hypershift/support/releaseinfo/fake"

configv1 "github.com/openshift/api/config/v1"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -102,3 +106,128 @@ func TestValidateHostedClusterPayloadSupportsNodePoolCPUArch(t *testing.T) {
})
}
}

func TestValidMinorVersionCompatibility(t *testing.T) {
// Define base HostedCluster structure
baseHC := &hyperv1.HostedCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "test-cluster",
Namespace: "test-namespace",
},
Spec: hyperv1.HostedClusterSpec{
PullSecret: corev1.LocalObjectReference{
Name: "pull-secret",
},
},
Status: hyperv1.HostedClusterStatus{
Version: &hyperv1.ClusterVersionStatus{
History: []configv1.UpdateHistory{},
},
},
}

basePullSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "pull-secret",
Namespace: "test-namespace",
},
Data: map[string][]byte{
corev1.DockerConfigJsonKey: []byte(`{"auths":{"quay.io":{"auth":"","email":""}}}`),
},
}

tests := []struct {
name string
controlPlaneVersion string
nodePoolReleaseImage string
nodePoolVersion string
expectedError string
}{
{
name: "when nodePool version matches control plane version it should not return error",
controlPlaneVersion: "4.18.5",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.18.5-x86_64",
nodePoolVersion: "4.18.5",
expectedError: "",
},
{
name: "when nodePool version is higher than control plane version it should return error",
controlPlaneVersion: "4.17.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.18.5-x86_64",
nodePoolVersion: "4.18.5",
expectedError: "NodePool version 4.18.5 cannot be higher than the HostedCluster version 4.17.0",
},
{
name: "when nodePool version is one minor version lower than control plane (odd version) it should not return error",
controlPlaneVersion: "4.17.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.16.37-x86_64",
nodePoolVersion: "4.16.37",
expectedError: "",
},
{
name: "when nodePool version is two minor versions lower than control plane (odd version) it should return error",
controlPlaneVersion: "4.17.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.15.47-x86_64",
nodePoolVersion: "4.15.47",
expectedError: "NodePool minor version 4.15 is not compatible with the HostedCluster minor version 4.17 (max allowed difference: 1)",
},
{
name: "when nodePool version is two minor versions lower than control plane (even version) it should not return error",
controlPlaneVersion: "4.18.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.16.0-x86_64",
nodePoolVersion: "4.16.0",
expectedError: "",
},
{
name: "when nodePool version is three minor versions lower than control plane (even version) it should return error",
controlPlaneVersion: "4.18.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:4.15.0-x86_64",
nodePoolVersion: "4.15.0",
expectedError: "NodePool minor version 4.15 is not compatible with the HostedCluster minor version 4.18 (max allowed difference: 2)",
},
{
name: "when nodePool major version is higher than control plane version it should return error",
controlPlaneVersion: "4.18.0",
nodePoolReleaseImage: "quay.io/openshift-release-dev/ocp-release:5.0.0-x86_64",
nodePoolVersion: "5.0.0",
expectedError: "NodePool version 5.0.0 cannot be higher than the HostedCluster version 4.18.0",
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
g := NewWithT(t)
t.Logf("Running test case: %s", test.name)

// Create a copy of the base HostedCluster and modify only the version
hc := baseHC.DeepCopy()
hc.Status.Version.History = []configv1.UpdateHistory{
{
State: configv1.CompletedUpdate,
Version: test.controlPlaneVersion,
},
}

// Create the resources in the fake client
objs := []client.Object{hc, basePullSecret}
c := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects(objs...).Build()

releaseProvider := &fakereleaseprovider.FakeReleaseProvider{
Version: test.nodePoolVersion,
}

// Run the test
err := validMinorVersionCompatibility(context.TODO(), c, "test-cluster", "test-namespace", test.nodePoolReleaseImage, releaseProvider)

// Check the results
if test.expectedError == "" {
g.Expect(err).NotTo(HaveOccurred())
t.Log("Test passed as expected with no error")
} else {
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(Equal(test.expectedError))
t.Logf("Test passed as expected with error: %s", test.expectedError)
}
})
}
}