From beca62f4766b95ac0d522f4486c388770d543f71 Mon Sep 17 00:00:00 2001 From: Praveen Kumar Date: Mon, 22 Jul 2024 21:13:41 +0530 Subject: [PATCH] start: Remove workaround to delete apiserver pod in case of aggregator-client-ca regenerated This workaround was required during OCP-4.3 timeframe and that bug is also fixed so better to remove that workaround also. - https://bugzilla.redhat.com/show_bug.cgi?id=1795163 --- pkg/crc/cluster/cluster.go | 31 ------------------------------- pkg/crc/machine/start.go | 23 ----------------------- 2 files changed, 54 deletions(-) diff --git a/pkg/crc/cluster/cluster.go b/pkg/crc/cluster/cluster.go index ad2201358a..0b6f764a0d 100644 --- a/pkg/crc/cluster/cluster.go +++ b/pkg/crc/cluster/cluster.go @@ -432,20 +432,6 @@ func WaitForPullSecretPresentOnInstanceDisk(ctx context.Context, sshRunner *ssh. return errors.Retry(ctx, 7*time.Minute, pullSecretPresentFunc, 2*time.Second) } -func WaitForRequestHeaderClientCaFile(ctx context.Context, sshRunner *ssh.Runner) error { - lookupRequestHeaderClientCa := func() error { - expired, err := checkCertValidity(sshRunner, AggregatorClientCert) - if err != nil { - return fmt.Errorf("Failed to the expiry date: %v", err) - } - if expired { - return &errors.RetriableError{Err: fmt.Errorf("certificate still expired")} - } - return nil - } - return errors.Retry(ctx, 8*time.Minute, lookupRequestHeaderClientCa, 2*time.Second) -} - func WaitForAPIServer(ctx context.Context, ocConfig oc.Config) error { logging.Info("Waiting for kube-apiserver availability... [takes around 2min]") waitForAPIServer := func() error { @@ -460,23 +446,6 @@ func WaitForAPIServer(ctx context.Context, ocConfig oc.Config) error { return errors.Retry(ctx, 4*time.Minute, waitForAPIServer, time.Second) } -func DeleteOpenshiftAPIServerPods(ctx context.Context, ocConfig oc.Config) error { - if err := WaitForOpenshiftResource(ctx, ocConfig, "pod"); err != nil { - return err - } - - deleteOpenshiftAPIServerPods := func() error { - cmdArgs := []string{"delete", "pod", "--all", "--force", "-n", "openshift-apiserver"} - _, stderr, err := ocConfig.WithFailFast().RunOcCommand(cmdArgs...) - if err != nil { - return &errors.RetriableError{Err: fmt.Errorf("Failed to delete pod from openshift-apiserver namespace %v: %s", err, stderr)} - } - return nil - } - - return errors.Retry(ctx, 60*time.Second, deleteOpenshiftAPIServerPods, time.Second) -} - func CheckProxySettingsForOperator(ocConfig oc.Config, proxy *httpproxy.ProxyConfig, deployment, namespace string) (bool, error) { if !proxy.IsEnabled() { logging.Debugf("No proxy in use") diff --git a/pkg/crc/machine/start.go b/pkg/crc/machine/start.go index 587c07d8ea..8d0295d081 100644 --- a/pkg/crc/machine/start.go +++ b/pkg/crc/machine/start.go @@ -602,29 +602,6 @@ func (client *client) Start(ctx context.Context, startConfig types.StartConfig) } } - // In Openshift 4.3, when cluster comes up, the following happens - // 1. After the openshift-apiserver pod is started, its log contains multiple occurrences of `certificate has expired or is not yet valid` - // 2. Initially there is no request-header's client-ca crt available to `extension-apiserver-authentication` configmap - // 3. In the pod logs `missing content for CA bundle "client-ca::kube-system::extension-apiserver-authentication::requestheader-client-ca-file"` - // 4. After ~1 min /etc/kubernetes/static-pod-resources/kube-apiserver-certs/configmaps/aggregator-client-ca/ca-bundle.crt is regenerated - // 5. It is now also appear to `extension-apiserver-authentication` configmap as part of request-header's client-ca content - // 6. Openshift-apiserver is able to load the CA which was regenerated - // 7. Now apiserver pod log contains multiple occurrences of `error x509: certificate signed by unknown authority` - // When the openshift-apiserver is in this state, the cluster is non functional. - // A restart of the openshift-apiserver pod is enough to clear that error and get a working cluster. - // This is a work-around while the root cause is being identified. - // More info: https://bugzilla.redhat.com/show_bug.cgi?id=1795163 - if certsExpired[cluster.AggregatorClientCert] { - logging.Debug("Waiting for the renewal of the request header client ca...") - if err := cluster.WaitForRequestHeaderClientCaFile(ctx, sshRunner); err != nil { - return nil, errors.Wrap(err, "Failed to wait for aggregator client ca renewal") - } - - if err := cluster.DeleteOpenshiftAPIServerPods(ctx, ocConfig); err != nil { - return nil, errors.Wrap(err, "Cannot delete OpenShift API Server pods") - } - } - if err := updateKubeconfig(ctx, ocConfig, sshRunner, vm.bundle.GetKubeConfigPath()); err != nil { return nil, errors.Wrap(err, "Failed to update kubeconfig file") }