openshift · openshift-merge-bot · Mar 27, 2025 · Mar 19, 2025 · Mar 19, 2025 · muraee
diff --git a/control-plane-operator/controllers/hostedcontrolplane/kas/deployment.go b/control-plane-operator/controllers/hostedcontrolplane/kas/deployment.go
@@ -39,6 +39,10 @@ const (
 var (
 	volumeMounts = util.PodVolumeMounts{
 		kasContainerBootstrap().Name: {
+			kasVolumeBootstrapManifests().Name:  "/work",
+			kasVolumeLocalhostKubeconfig().Name: "/var/secrets/localhost-kubeconfig",
+		},
+		kasContainerBootstrapRender().Name: {
 			kasVolumeBootstrapManifests().Name: "/work",
 		},
 		kasContainerApplyBootstrap().Name: {
@@ -200,9 +204,11 @@ func ReconcileKubeAPIServerDeployment(deployment *appsv1.Deployment,
 			SchedulerName:                 corev1.DefaultSchedulerName,
 			AutomountServiceAccountToken:  ptr.To(false),
 			InitContainers: []corev1.Container{
-				util.BuildContainer(kasContainerBootstrap(), buildKASContainerBootstrap(images.ClusterConfigOperator, payloadVersion, featureGateYaml)),
+				util.BuildContainer(kasContainerBootstrapRender(), buildKASContainerBootstrapRender(images.ClusterConfigOperator, payloadVersion, featureGateYaml)),
 			},
 			Containers: []corev1.Container{
+				// TODO(alberto): Move the logic from kasContainerApplyBootstrap to kasContainerBootstrap and drop the former.
+				util.BuildContainer(kasContainerBootstrap(), buildKASContainerNewBootstrap(images.KASBootstrap)),
 				util.BuildContainer(kasContainerApplyBootstrap(), buildKASContainerApplyBootstrap(images.CLI)),
 				util.BuildContainer(kasContainerMain(), buildKASContainerMain(images.HyperKube, port, additionalNoProxyCIDRS, hcp)),
 				util.BuildContainer(konnectivityServerContainer(), buildKonnectivityServerContainer(images.KonnectivityServer, deploymentConfig.Replicas, cipherSuites)),
@@ -335,11 +341,41 @@ func ReconcileKubeAPIServerDeployment(deployment *appsv1.Deployment,
 
 func kasContainerBootstrap() *corev1.Container {
 	return &corev1.Container{
-		Name: "init-bootstrap",
+		Name: "bootstrap",
+	}
+}
+func buildKASContainerNewBootstrap(image string) func(c *corev1.Container) {
+	return func(c *corev1.Container) {
+		c.Image = image
+		c.TerminationMessagePolicy = corev1.TerminationMessageReadFile
+		c.TerminationMessagePath = corev1.TerminationMessagePathDefault
+		c.ImagePullPolicy = corev1.PullIfNotPresent
+		c.Command = []string{
+			"/usr/bin/control-plane-operator",
+			"kas-bootstrap",
+			"--rendered-featuregate-path", volumeMounts.Path(c.Name, kasVolumeBootstrapManifests().Name),
+		}
+		c.Resources.Requests = corev1.ResourceList{
+			corev1.ResourceCPU:    resource.MustParse("10m"),
+			corev1.ResourceMemory: resource.MustParse("10Mi"),
+		}
+		c.Env = []corev1.EnvVar{
+			{
+				Name:  "KUBECONFIG",
+				Value: path.Join(volumeMounts.Path(kasContainerBootstrap().Name, kasVolumeLocalhostKubeconfig().Name), KubeconfigKey),
+			},
+		}
+		c.VolumeMounts = volumeMounts.ContainerMounts(c.Name)
 	}
 }
 
-func buildKASContainerBootstrap(image, payloadVersion, featureGateYaml string) func(c *corev1.Container) {
+func kasContainerBootstrapRender() *corev1.Container {
+	return &corev1.Container{
+		Name: "bootstrap-render",
+	}
+}
+
+func buildKASContainerBootstrapRender(image, payloadVersion, featureGateYaml string) func(c *corev1.Container) {
 	return func(c *corev1.Container) {
 		c.Command = []string{
 			"/bin/bash",
@@ -349,7 +385,7 @@ func buildKASContainerBootstrap(image, payloadVersion, featureGateYaml string) f
 		c.TerminationMessagePath = corev1.TerminationMessagePathDefault
 		c.Args = []string{
 			"-c",
-			invokeBootstrapRenderScript(volumeMounts.Path(kasContainerBootstrap().Name, kasVolumeBootstrapManifests().Name), payloadVersion, featureGateYaml),
+			invokeBootstrapRenderScript(volumeMounts.Path(kasContainerBootstrapRender().Name, kasVolumeBootstrapManifests().Name), payloadVersion, featureGateYaml),
 		}
 		c.Resources.Requests = corev1.ResourceList{
 			corev1.ResourceCPU:    resource.MustParse("10m"),
@@ -812,13 +848,6 @@ while true; do
   fi
   sleep 1
 done
-while true; do
-  if oc replace --subresource=status -f %[1]s/99_feature-gate.yaml; then
-    echo "FeatureGate status applied successfully."
-    break
-  fi
-  sleep 1
-done
 while true; do
   sleep 1000 &
   wait $!

diff --git a/control-plane-operator/controllers/hostedcontrolplane/kas/params.go b/control-plane-operator/controllers/hostedcontrolplane/kas/params.go
@@ -33,6 +33,7 @@ type KubeAPIServerImages struct {
 	TokenMinterImage           string
 	AWSPodIdentityWebhookImage string
 	KonnectivityServer         string
+	KASBootstrap               string
 }
 
 type KubeAPIServerParams struct {
@@ -116,6 +117,7 @@ func NewKubeAPIServerParams(ctx context.Context, hcp *hyperv1.HostedControlPlane
 			AzureKMS:                   releaseImageProvider.GetImage("azure-kms-encryption-provider"),
 			AWSPodIdentityWebhookImage: releaseImageProvider.GetImage("aws-pod-identity-webhook"),
 			KonnectivityServer:         releaseImageProvider.GetImage("apiserver-network-proxy"),
+			KASBootstrap:               releaseImageProvider.GetImage(util.CPOImageName),
 		},
 		MaxRequestsInflight:         fmt.Sprint(defaultMaxRequestsInflight),
 		MaxMutatingRequestsInflight: fmt.Sprint(defaultMaxMutatingRequestsInflight),

diff --git a/control-plane-operator/main.go b/control-plane-operator/main.go
@@ -20,6 +20,7 @@ import (
 	etcdbackup "github.com/openshift/hypershift/etcd-backup"
 	etcddefrag "github.com/openshift/hypershift/etcd-defrag"
 	ignitionserver "github.com/openshift/hypershift/ignition-server/cmd"
+	kasbootstrap "github.com/openshift/hypershift/kas-bootstrap"
 	konnectivityhttpsproxy "github.com/openshift/hypershift/konnectivity-https-proxy"
 	konnectivitysocks5proxy "github.com/openshift/hypershift/konnectivity-socks5-proxy"
 	kubernetesdefaultproxy "github.com/openshift/hypershift/kubernetes-default-proxy"
@@ -78,6 +79,8 @@ func main() {
 func commandFor(name string) *cobra.Command {
 	var cmd *cobra.Command
 	switch name {
+	case "kas-bootstrap":
+		cmd = kasbootstrap.NewRunCommand()
 	case "ignition-server":
 		cmd = ignitionserver.NewStartCommand()
 	case "konnectivity-socks5-proxy":
@@ -140,7 +143,7 @@ func defaultCommand() *cobra.Command {
 	cmd.AddCommand(kubernetesdefaultproxy.NewStartCommand())
 	cmd.AddCommand(dnsresolver.NewCommand())
 	cmd.AddCommand(etcdbackup.NewStartCommand())
-
+	cmd.AddCommand(kasbootstrap.NewRunCommand())
 	return cmd
 
 }
@@ -358,6 +361,7 @@ func NewStartCommand() *cobra.Command {
 		}
 		setupLog.Info("using token minter image", "image", tokenMinterImage)
 
+		cpoImage = os.Getenv("CONTROL_PLANE_OPERATOR_IMAGE")
 		cpoImage, err = lookupOperatorImage(cpoImage)
 		if err != nil {
 			setupLog.Error(err, "failed to find controlplane-operator-image")

diff --git a/kas-bootstrap/kas_boostrap.go b/kas-bootstrap/kas_boostrap.go
@@ -0,0 +1,142 @@
+package kasbootstrap
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	configv1 "github.com/openshift/api/config/v1"
+
+	equality "k8s.io/apimachinery/pkg/api/equality"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/runtime/serializer"
+	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"k8s.io/apimachinery/pkg/util/sets"
+
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log/zap"
+
+	"go.uber.org/zap/zapcore"
+)
+
+func init() {
+	utilruntime.Must(configv1.Install(configScheme))
+}
+
+var (
+	configScheme = runtime.NewScheme()
+	configCodecs = serializer.NewCodecFactory(configScheme)
+)
+
+func run(ctx context.Context, opts Options) error {
+	logger := zap.New(zap.JSONEncoder(func(o *zapcore.EncoderConfig) {
+		o.EncodeTime = zapcore.RFC3339TimeEncoder
+	}))
+	ctrl.SetLogger(logger)
+
+	cfg, err := ctrl.GetConfig()
+	if err != nil {
+		return fmt.Errorf("failed to get config: %w", err)
+	}
+	c, err := client.New(cfg, client.Options{Scheme: configScheme})
+	if err != nil {
+		return fmt.Errorf("failed to create client: %w", err)
+	}
+
+	content, err := os.ReadFile(filepath.Join(opts.RenderedFeatureGatePath, "99_feature-gate.yaml"))
+	if err != nil {
+		return fmt.Errorf("failed to read featureGate file: %w", err)
+	}
+
+	renderedFeatureGate, err := parseFeatureGateV1(content)
+	if err != nil {
+		return fmt.Errorf("failed to parse featureGate file: %w", err)
+	}
+
+	if err := reconcileFeatureGate(ctx, c, renderedFeatureGate); err != nil {
+		return fmt.Errorf("failed to reconcile featureGate: %w", err)
+	}
+
+	// we want to keep the process running during the lifecycle of the Pod because the Pod runs with restartPolicy=Always
+	// and it's not possible for individual containers to have a dedicated restartPolicy like onFailure.
+
+	// start a goroutine that will close the done channel when the context is done.
+	done := make(chan struct{})
+	go func() {
+		<-ctx.Done()
+		close(done)
+	}()
+
+	logger.Info("kas-bootstrap process completed successfully, waiting for termination signal")
+	<-done
+
+	return nil
+}
+
+// reconcileFeatureGate reconciles the featureGate CR status appending the renderedFeatureGate status.featureGates to the existing featureGates.
+// It will not fail if the clusterVersion is not found as this is expected for a brand new cluster.
+// But it will remove any featureGates that are not in the clusterVersion.Status.History if it exists.
+func reconcileFeatureGate(ctx context.Context, c client.Client, renderedFeatureGate *configv1.FeatureGate) error {
+	logger := ctrl.LoggerFrom(ctx).WithName("kas-bootstrap")
+
+	knownVersions := sets.NewString()
+	var clusterVersion configv1.ClusterVersion
+	err := c.Get(ctx, client.ObjectKey{Name: "version"}, &clusterVersion)
+	if err != nil {
+		// we don't fail if we can't get the clusterVersion, we will just not update the featureGate.
+		// This is always the case for a brand new cluster as the clusterVersion is not created yet.
+		logger.Info("WARNING: failed to get clusterVersion. This is expected for a brand new cluster", "error", err)
+	} else {
+		knownVersions = sets.NewString(clusterVersion.Status.Desired.Version)
+		for _, cvoVersion := range clusterVersion.Status.History {
+			knownVersions.Insert(cvoVersion.Version)
+
+			// Once we hit the first Completed entry and insert that into knownVersions
+			// we can break, because there shouldn't be anything left on the cluster that cares about those ancient releases anymore.
+			if cvoVersion.State == configv1.CompletedUpdate {
+				break
+			}
+		}
+	}
+
+	var featureGate configv1.FeatureGate
+	if err := c.Get(ctx, client.ObjectKey{Name: "cluster"}, &featureGate); err != nil {
+		return fmt.Errorf("failed to get featureGate: %w", err)
+	}
+
+	desiredFeatureGates := renderedFeatureGate.Status.FeatureGates
+	currentVersion := renderedFeatureGate.Status.FeatureGates[0].Version
+	for i := range featureGate.Status.FeatureGates {
+		featureGateValues := featureGate.Status.FeatureGates[i]
+		if featureGateValues.Version == currentVersion {
+			continue
+		}
+		if len(knownVersions) > 0 && !knownVersions.Has(featureGateValues.Version) {
+			continue
+		}
+		desiredFeatureGates = append(desiredFeatureGates, featureGateValues)
+	}
+
+	if equality.Semantic.DeepEqual(desiredFeatureGates, featureGate.Status.FeatureGates) {
+		logger.Info("There is no update for featureGate.Status.FeatureGates")
+		return nil
+	}
+
+	original := featureGate.DeepCopy()
+	featureGate.Status.FeatureGates = desiredFeatureGates
+	if err := c.Status().Patch(ctx, &featureGate, client.MergeFromWithOptions(original, client.MergeFromWithOptimisticLock{})); err != nil {
+		return fmt.Errorf("failed to update featureGate: %w", err)
+	}
+	return nil
+}
+
+func parseFeatureGateV1(objBytes []byte) (*configv1.FeatureGate, error) {
+	requiredObj, err := runtime.Decode(configCodecs.UniversalDecoder(configv1.SchemeGroupVersion), objBytes)
+	if err != nil {
+		return nil, fmt.Errorf("failed to decode featureGate: %w", err)
+	}
+
+	return requiredObj.(*configv1.FeatureGate), nil
+}