diff --git a/internal/controller/kyma/controller.go b/internal/controller/kyma/controller.go index 6715f347d7..6d94312441 100644 --- a/internal/controller/kyma/controller.go +++ b/internal/controller/kyma/controller.go @@ -425,14 +425,14 @@ func (r *Reconciler) handleProcessingState(ctx context.Context, kyma *v1beta2.Ky errGroup.Go(func() error { if err := r.SKRWebhookManager.Reconcile(ctx, kyma); err != nil { r.Metrics.RecordRequeueReason(metrics.SkrWebhookResourcesInstallation, queue.UnexpectedRequeue) + kyma.UpdateCondition(v1beta2.ConditionTypeSKRWebhook, apimetav1.ConditionFalse) if errors.Is(err, watcher.ErrSkrCertificateNotReady) { - kyma.UpdateCondition(v1beta2.ConditionTypeSKRWebhook, apimetav1.ConditionFalse) return nil } return err } - kyma.UpdateCondition(v1beta2.ConditionTypeSKRWebhook, apimetav1.ConditionTrue) - return nil + skrClient, _ := r.SkrContextFactory.Get(client.ObjectKeyFromObject(kyma)) + return checkSKRWebhookReadiness(ctx, skrClient, kyma) }) } @@ -454,6 +454,19 @@ func (r *Reconciler) handleProcessingState(ctx context.Context, kyma *v1beta2.Ky r.updateStatus(ctx, kyma, state, "waiting for all modules to become ready") } +func checkSKRWebhookReadiness(ctx context.Context, skrClient *remote.SkrContext, kyma *v1beta2.Kyma) error { + err := watcher.AssertDeploymentReady(ctx, skrClient) + if err != nil { + kyma.UpdateCondition(v1beta2.ConditionTypeSKRWebhook, apimetav1.ConditionFalse) + if errors.Is(err, watcher.ErrSkrWebhookDeploymentInBackoff) { + return err + } + return nil + } + kyma.UpdateCondition(v1beta2.ConditionTypeSKRWebhook, apimetav1.ConditionTrue) + return nil +} + func (r *Reconciler) handleDeletingState(ctx context.Context, kyma *v1beta2.Kyma) (ctrl.Result, error) { logger := logf.FromContext(ctx).V(log.InfoLevel) diff --git a/pkg/watcher/skr_webhook_manifest_manager.go b/pkg/watcher/skr_webhook_manifest_manager.go index 500d7fbbab..3f664657c7 100644 --- a/pkg/watcher/skr_webhook_manifest_manager.go +++ b/pkg/watcher/skr_webhook_manifest_manager.go @@ -6,6 +6,7 @@ import ( "fmt" "github.com/go-logr/logr" + apiappsv1 "k8s.io/api/apps/v1" apicorev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -24,10 +25,15 @@ import ( skrwebhookresources "github.com/kyma-project/lifecycle-manager/pkg/watcher/skr_webhook_resources" ) -var ErrSkrCertificateNotReady = errors.New("SKR certificate not ready") +var ( + ErrSkrCertificateNotReady = errors.New("SKR certificate not ready") + ErrSkrWebhookDeploymentNotReady = errors.New("SKR webhook deployment not ready") + ErrSkrWebhookDeploymentInBackoff = errors.New("SKR webhook deployment in backoff state") +) const ( - skrChartFieldOwner = client.FieldOwner(shared.OperatorName) + skrChartFieldOwner = client.FieldOwner(shared.OperatorName) + skrWebhookDeploymentName = "skr-webhook" ) type WatcherMetrics interface { @@ -294,3 +300,38 @@ func getWatchers(ctx context.Context, kcpClient client.Client) ([]v1beta2.Watche return watcherList.Items, nil } + +func AssertDeploymentReady(ctx context.Context, skrClient client.Reader) error { + deployment := apiappsv1.Deployment{} + deploymentKey := client.ObjectKey{ + Name: skrWebhookDeploymentName, + Namespace: shared.DefaultRemoteNamespace, + } + if err := skrClient.Get(ctx, deploymentKey, &deployment); err != nil { + return fmt.Errorf("failed to get skr-webhook deployment: %w", err) + } + + podList := &apicorev1.PodList{} + err := skrClient.List(ctx, podList, client.InNamespace(shared.DefaultRemoteNamespace), + client.MatchingLabels{"app": skrWebhookDeploymentName}) + if err != nil { + return fmt.Errorf("failed to list pods: %w", err) + } + + if deploymentNotReady := deployment.Status.ReadyReplicas == 0; deploymentNotReady { + // Check if pods are in backoff state + for _, pod := range podList.Items { + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && (cs.State.Waiting.Reason == "CrashLoopBackOff" || + cs.State.Waiting.Reason == "ImagePullBackOff") { + return fmt.Errorf("%w: pod %s/%s in backoff state (%s)", ErrSkrWebhookDeploymentInBackoff, + pod.Namespace, pod.Name, cs.State.Waiting.Reason) + } + } + } + + return fmt.Errorf("%w: deployment %s/%s is not in Ready state", ErrSkrWebhookDeploymentNotReady, + deployment.Namespace, deployment.Name) + } + return nil +} diff --git a/pkg/watcher/skr_webhook_manifest_manager_test.go b/pkg/watcher/skr_webhook_manifest_manager_test.go new file mode 100644 index 0000000000..2696eb2ad8 --- /dev/null +++ b/pkg/watcher/skr_webhook_manifest_manager_test.go @@ -0,0 +1,129 @@ +package watcher_test + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/require" + apiappsv1 "k8s.io/api/apps/v1" + apicorev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/kyma-project/lifecycle-manager/pkg/watcher" +) + +func TestAssertDeploymentReady_ReturnsNoError_WhenDeploymentReady(t *testing.T) { + readyDeployment := &apiappsv1.Deployment{ + Status: apiappsv1.DeploymentStatus{ + ReadyReplicas: 1, + }, + } + getFunc := func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + deployment, _ := obj.(*apiappsv1.Deployment) + *deployment = *readyDeployment + return nil + } + listFunc := func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return nil + } + mockClnt := &mockClient{getFunc: getFunc, listFunc: listFunc} + ctx := t.Context() + + err := watcher.AssertDeploymentReady(ctx, mockClnt) + require.NoError(t, err) +} + +func TestAssertDeploymentReady_ReturnsError_WhenDeploymentNotReady(t *testing.T) { + notReadyDeployment := &apiappsv1.Deployment{ + Status: apiappsv1.DeploymentStatus{ + ReadyReplicas: 0, + }, + } + getFunc := func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + deployment, _ := obj.(*apiappsv1.Deployment) + *deployment = *notReadyDeployment + return nil + } + listFunc := func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return nil + } + mockClnt := &mockClient{getFunc: getFunc, listFunc: listFunc} + ctx := t.Context() + + err := watcher.AssertDeploymentReady(ctx, mockClnt) + require.Error(t, err) + require.ErrorIs(t, err, watcher.ErrSkrWebhookDeploymentNotReady) +} + +func TestAssertDeploymentReady_ReturnsError_WhenClientReturnsError(t *testing.T) { + unexpectedError := errors.New("unexpected error") + notFoundFunc := func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + return unexpectedError + } + listFunc := func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return nil + } + mockClnt := &mockClient{getFunc: notFoundFunc, listFunc: listFunc} + ctx := t.Context() + + err := watcher.AssertDeploymentReady(ctx, mockClnt) + require.Error(t, err) + require.ErrorIs(t, err, unexpectedError) +} + +func TestAssertDeploymentReady_ReturnsError_WhenDeploymentInBackoff(t *testing.T) { + deployment := &apiappsv1.Deployment{ + Status: apiappsv1.DeploymentStatus{ + ReadyReplicas: 0, + }, + } + podList := &apicorev1.PodList{ + Items: []apicorev1.Pod{{ + Status: apicorev1.PodStatus{ + ContainerStatuses: []apicorev1.ContainerStatus{{ + State: apicorev1.ContainerState{ + Waiting: &apicorev1.ContainerStateWaiting{ + Reason: "CrashLoopBackOff", + }, + }, + }}, + }, + }}, + } + getFunc := func(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + deploymentObj, ok := obj.(*apiappsv1.Deployment) + if ok { + *deploymentObj = *deployment + } + return nil + } + listFunc := func(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + podListObj, ok := list.(*apicorev1.PodList) + if ok { + *podListObj = *podList + } + return nil + } + mockClnt := &mockClient{getFunc: getFunc, listFunc: listFunc} + ctx := t.Context() + + err := watcher.AssertDeploymentReady(ctx, mockClnt) + require.Error(t, err) + require.ErrorIs(t, err, watcher.ErrSkrWebhookDeploymentInBackoff) +} + +// Stub for tests + +type mockClient struct { + getFunc func(context.Context, client.ObjectKey, client.Object, ...client.GetOption) error + listFunc func(context.Context, client.ObjectList, ...client.ListOption) error +} + +func (m *mockClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + return m.getFunc(ctx, key, obj, opts...) +} + +func (m *mockClient) List(ctx context.Context, list client.ObjectList, opts ...client.ListOption) error { + return m.listFunc(ctx, list, opts...) +} diff --git a/unit-test-coverage.yaml b/unit-test-coverage.yaml index 8a35873356..d540df6643 100644 --- a/unit-test-coverage.yaml +++ b/unit-test-coverage.yaml @@ -30,6 +30,7 @@ packages: pkg/module/sync: 10 pkg/templatelookup: 87 pkg/templatelookup/moduletemplateinfolookup: 98 + pkg/watcher: 10 pkg/watcher/skr_webhook_resources: 82 pkg/watcher/certificate: 100 pkg/watcher/certificate/certmanager: 100