From 4754f04cb24b606994b8cb29c2098b41ef745992 Mon Sep 17 00:00:00 2001 From: Noah Kantrowitz Date: Mon, 28 Dec 2020 12:08:42 -0800 Subject: [PATCH 1/5] For scaling deployments in particular, use a direct object get rather than the /scale API so that it can use the informer cache for better performance. This is annoying as a special case but is so common and improves performance so much that I think it's worthwhile to include. Another option would be to majorly increase the QPS rate limit on the scaling API client however that would also increase kube-apiserver load while watches/informers are generally much less impactful. Signed-off-by: Noah Kantrowitz --- controllers/scaledobject_controller.go | 1 + pkg/scaling/executor/scale_scaledobjects.go | 71 +++++++++++++++------ 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/controllers/scaledobject_controller.go b/controllers/scaledobject_controller.go index 3a249ac57df..e39cf3abb75 100644 --- a/controllers/scaledobject_controller.go +++ b/controllers/scaledobject_controller.go @@ -56,6 +56,7 @@ type ScaledObjectReconciler struct { // SetupWithManager initializes the ScaledObjectReconciler instance and starts a new controller managed by the passed Manager instance. func (r *ScaledObjectReconciler) SetupWithManager(mgr ctrl.Manager) error { // create Discovery clientset + // TODO If we need to increase the QPS of scaling API calls, copy and tweak this RESTConfig. clientset, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) if err != nil { r.Log.Error(err, "Not able to create Discovery clientset") diff --git a/pkg/scaling/executor/scale_scaledobjects.go b/pkg/scaling/executor/scale_scaledobjects.go index 8e2f558430a..4be3cbbbbed 100644 --- a/pkg/scaling/executor/scale_scaledobjects.go +++ b/pkg/scaling/executor/scale_scaledobjects.go @@ -2,11 +2,14 @@ package executor import ( "context" + "strings" "time" "github.com/go-logr/logr" + appsv1 "k8s.io/api/apps/v1" autoscalingv1 "k8s.io/api/autoscaling/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" kedav1alpha1 "github.com/kedacore/keda/v2/api/v1alpha1" ) @@ -16,18 +19,38 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al "scaledObject.Namespace", scaledObject.Namespace, "scaleTarget.Name", scaledObject.Spec.ScaleTargetRef.Name) - currentScale, err := e.getScaleTargetScale(ctx, scaledObject) - if err != nil { - logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") - return + // Get the current replica count. As a special case, Deployments fetch directly from the object so they can use the informer cache + // to reduce API calls. Everything else uses the scale subresource. + var currentScale *autoscalingv1.Scale + var currentReplicas int32 + targetRef := scaledObject.Spec.ScaleTargetRef + // TODO Should this use a more generic runtime.Object approach using scheme.Scheme.New()? + if (targetRef.APIVersion == "" || strings.HasPrefix(targetRef.APIVersion, "apps/")) && + (targetRef.Kind == "" || targetRef.Kind == "Deployment") { + deployment := &appsv1.Deployment{} + err := e.client.Get(ctx, client.ObjectKey{Name: targetRef.Name, Namespace: scaledObject.Namespace}, deployment) + if err != nil { + logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + return + } + currentReplicas = *deployment.Spec.Replicas + } else { + var err error + currentScale, err = e.getScaleTargetScale(ctx, scaledObject) + if err != nil { + logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + return + } + currentReplicas = currentScale.Spec.Replicas } + switch { - case currentScale.Spec.Replicas == 0 && isActive: + case currentReplicas == 0 && isActive: // current replica count is 0, but there is an active trigger. // scale the ScaleTarget up e.scaleFromZero(ctx, logger, scaledObject, currentScale) case !isActive && - currentScale.Spec.Replicas > 0 && + currentReplicas > 0 && (scaledObject.Spec.MinReplicaCount == nil || *scaledObject.Spec.MinReplicaCount == 0): // there are no active triggers, but the ScaleTarget has replicas. // AND @@ -37,14 +60,12 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al e.scaleToZero(ctx, logger, scaledObject, currentScale) case !isActive && scaledObject.Spec.MinReplicaCount != nil && - currentScale.Spec.Replicas < *scaledObject.Spec.MinReplicaCount: + currentReplicas < *scaledObject.Spec.MinReplicaCount: // there are no active triggers // AND // ScaleTarget replicas count is less than minimum replica count specified in ScaledObject // Let's set ScaleTarget replicas count to correct value - currentScale.Spec.Replicas = *scaledObject.Spec.MinReplicaCount - - err := e.updateScaleOnScaleTarget(ctx, scaledObject, currentScale) + _, err := e.updateScaleOnScaleTarget(ctx, scaledObject, currentScale, *scaledObject.Spec.MinReplicaCount) if err == nil { logger.Info("Successfully set ScaleTarget replicas count to ScaledObject minReplicaCount", "ScaleTarget.Replicas", currentScale.Spec.Replicas) @@ -93,8 +114,7 @@ func (e *scaleExecutor) scaleToZero(ctx context.Context, logger logr.Logger, sca if scaledObject.Status.LastActiveTime == nil || scaledObject.Status.LastActiveTime.Add(cooldownPeriod).Before(time.Now()) { // or last time a trigger was active was > cooldown period, so scale down. - scale.Spec.Replicas = 0 - err := e.updateScaleOnScaleTarget(ctx, scaledObject, scale) + _, err := e.updateScaleOnScaleTarget(ctx, scaledObject, scale, 0) if err == nil { logger.Info("Successfully scaled ScaleTarget to 0 replicas") if err := e.setActiveCondition(ctx, logger, scaledObject, metav1.ConditionFalse, "ScalerNotActive", "Scaling is not performed because triggers are not active"); err != nil { @@ -118,19 +138,19 @@ func (e *scaleExecutor) scaleToZero(ctx context.Context, logger logr.Logger, sca } func (e *scaleExecutor) scaleFromZero(ctx context.Context, logger logr.Logger, scaledObject *kedav1alpha1.ScaledObject, scale *autoscalingv1.Scale) { - currentReplicas := scale.Spec.Replicas + var replicas int32 if scaledObject.Spec.MinReplicaCount != nil && *scaledObject.Spec.MinReplicaCount > 0 { - scale.Spec.Replicas = *scaledObject.Spec.MinReplicaCount + replicas = *scaledObject.Spec.MinReplicaCount } else { - scale.Spec.Replicas = 1 + replicas = 1 } - err := e.updateScaleOnScaleTarget(ctx, scaledObject, scale) + currentReplicas, err := e.updateScaleOnScaleTarget(ctx, scaledObject, scale, replicas) if err == nil { logger.Info("Successfully updated ScaleTarget", "Original Replicas Count", currentReplicas, - "New Replicas Count", scale.Spec.Replicas) + "New Replicas Count", replicas) // Scale was successful. Update lastScaleTime and lastActiveTime on the scaledObject if err := e.updateLastActiveTime(ctx, logger, scaledObject); err != nil { @@ -144,7 +164,20 @@ func (e *scaleExecutor) getScaleTargetScale(ctx context.Context, scaledObject *k return (*e.scaleClient).Scales(scaledObject.Namespace).Get(ctx, scaledObject.Status.ScaleTargetGVKR.GroupResource(), scaledObject.Spec.ScaleTargetRef.Name, metav1.GetOptions{}) } -func (e *scaleExecutor) updateScaleOnScaleTarget(ctx context.Context, scaledObject *kedav1alpha1.ScaledObject, scale *autoscalingv1.Scale) error { +func (e *scaleExecutor) updateScaleOnScaleTarget(ctx context.Context, scaledObject *kedav1alpha1.ScaledObject, scale *autoscalingv1.Scale, replicas int32) (int32, error) { + if scale == nil { + // Wasn't retrieved earlier, grab it now. + var err error + scale, err = e.getScaleTargetScale(ctx, scaledObject) + if err != nil { + return -1, err + } + } + + // Update with requested repliacs. + currentReplicas := scale.Spec.Replicas + scale.Spec.Replicas = replicas + _, err := (*e.scaleClient).Scales(scaledObject.Namespace).Update(ctx, scaledObject.Status.ScaleTargetGVKR.GroupResource(), scale, metav1.UpdateOptions{}) - return err + return currentReplicas, err } From 2288c7ee54ecb25c1f7874c75bdf4dcce4165831 Mon Sep 17 00:00:00 2001 From: Noah Kantrowitz Date: Mon, 28 Dec 2020 12:31:11 -0800 Subject: [PATCH 2/5] Update changelog. Signed-off-by: Noah Kantrowitz --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8a3d1d1b8a..807e0a08db5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ - Mask password in postgres scaler auto generated metricName. ([PR #1381](https://github.com/kedacore/keda/pull/1381)) - Bug fix for pending jobs in ScaledJob's accurateScalingStrategy . ([#1323](https://github.com/kedacore/keda/issues/1323)) - Fix memory leak because of unclosed scalers. ([#1413](https://github.com/kedacore/keda/issues/1413)) +- Improve performance when fetching current scaling information on Deployments ([#1458](https://github.com/kedacore/keda/pull/1458)) ### Breaking Changes From 4eb4cb038fd014c1ed9034ebcaafc1095330ca54 Mon Sep 17 00:00:00 2001 From: Noah Kantrowitz Date: Mon, 4 Jan 2021 13:47:34 -0800 Subject: [PATCH 3/5] Use the already-normalized GVKR data so less weird string parsing. Also adds support for StatefulSets for symmetry. Signed-off-by: Noah Kantrowitz --- pkg/scaling/executor/scale_scaledobjects.go | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pkg/scaling/executor/scale_scaledobjects.go b/pkg/scaling/executor/scale_scaledobjects.go index 4be3cbbbbed..702bd241064 100644 --- a/pkg/scaling/executor/scale_scaledobjects.go +++ b/pkg/scaling/executor/scale_scaledobjects.go @@ -2,7 +2,6 @@ package executor import ( "context" - "strings" "time" "github.com/go-logr/logr" @@ -23,18 +22,25 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al // to reduce API calls. Everything else uses the scale subresource. var currentScale *autoscalingv1.Scale var currentReplicas int32 - targetRef := scaledObject.Spec.ScaleTargetRef - // TODO Should this use a more generic runtime.Object approach using scheme.Scheme.New()? - if (targetRef.APIVersion == "" || strings.HasPrefix(targetRef.APIVersion, "apps/")) && - (targetRef.Kind == "" || targetRef.Kind == "Deployment") { + targetGVKR := scaledObject.Status.ScaleTargetGVKR + switch { + case targetGVKR.Group == "apps" && targetGVKR.Kind == "Deployment": deployment := &appsv1.Deployment{} - err := e.client.Get(ctx, client.ObjectKey{Name: targetRef.Name, Namespace: scaledObject.Namespace}, deployment) + err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, deployment) if err != nil { logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") return } currentReplicas = *deployment.Spec.Replicas - } else { + case targetGVKR.Group == "apps" && targetGVKR.Kind == "StatefulSet": + statefulSet := &appsv1.StatefulSet{} + err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, statefulSet) + if err != nil { + logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + return + } + currentReplicas = *statefulSet.Spec.Replicas + default: var err error currentScale, err = e.getScaleTargetScale(ctx, scaledObject) if err != nil { From abcf8bb57863fb34fb4337c93dfcee1993da39a7 Mon Sep 17 00:00:00 2001 From: Noah Kantrowitz Date: Tue, 5 Jan 2021 12:26:20 -0800 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Zbynek Roubalik <726523+zroubalik@users.noreply.github.com> Signed-off-by: Noah Kantrowitz --- pkg/scaling/executor/scale_scaledobjects.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/scaling/executor/scale_scaledobjects.go b/pkg/scaling/executor/scale_scaledobjects.go index 702bd241064..8bc4c2e0ea0 100644 --- a/pkg/scaling/executor/scale_scaledobjects.go +++ b/pkg/scaling/executor/scale_scaledobjects.go @@ -18,7 +18,7 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al "scaledObject.Namespace", scaledObject.Namespace, "scaleTarget.Name", scaledObject.Spec.ScaleTargetRef.Name) - // Get the current replica count. As a special case, Deployments fetch directly from the object so they can use the informer cache + // Get the current replica count. As a special case, Deployments and StatefulSets fetch directly from the object so they can use the informer cache // to reduce API calls. Everything else uses the scale subresource. var currentScale *autoscalingv1.Scale var currentReplicas int32 @@ -28,7 +28,7 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al deployment := &appsv1.Deployment{} err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, deployment) if err != nil { - logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + logger.Error(err, "Error getting information on the current Scale (ie. replicas count) on the scaleTarget") return } currentReplicas = *deployment.Spec.Replicas @@ -36,7 +36,7 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al statefulSet := &appsv1.StatefulSet{} err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, statefulSet) if err != nil { - logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + logger.Error(err, "Error getting information on the current Scale (ie. replicas count) on the scaleTarget") return } currentReplicas = *statefulSet.Spec.Replicas @@ -44,7 +44,7 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al var err error currentScale, err = e.getScaleTargetScale(ctx, scaledObject) if err != nil { - logger.Error(err, "Error getting information on the current Scale (ie. replias count) on the scaleTarget") + logger.Error(err, "Error getting information on the current Scale (ie. replicas count) on the scaleTarget") return } currentReplicas = currentScale.Spec.Replicas From ff66687240abd66989782633ac92026b40fb9684 Mon Sep 17 00:00:00 2001 From: Noah Kantrowitz Date: Wed, 6 Jan 2021 09:04:54 -0800 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Zbynek Roubalik <726523+zroubalik@users.noreply.github.com> Signed-off-by: Noah Kantrowitz --- pkg/scaling/executor/scale_scaledobjects.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/scaling/executor/scale_scaledobjects.go b/pkg/scaling/executor/scale_scaledobjects.go index 8bc4c2e0ea0..6e4a5c1cc4b 100644 --- a/pkg/scaling/executor/scale_scaledobjects.go +++ b/pkg/scaling/executor/scale_scaledobjects.go @@ -22,11 +22,12 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al // to reduce API calls. Everything else uses the scale subresource. var currentScale *autoscalingv1.Scale var currentReplicas int32 + targetName := scaledObject.Spec.ScaleTargetRef.Name targetGVKR := scaledObject.Status.ScaleTargetGVKR switch { case targetGVKR.Group == "apps" && targetGVKR.Kind == "Deployment": deployment := &appsv1.Deployment{} - err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, deployment) + err := e.client.Get(ctx, client.ObjectKey{Name: targetName, Namespace: scaledObject.Namespace}, deployment) if err != nil { logger.Error(err, "Error getting information on the current Scale (ie. replicas count) on the scaleTarget") return @@ -34,7 +35,7 @@ func (e *scaleExecutor) RequestScale(ctx context.Context, scaledObject *kedav1al currentReplicas = *deployment.Spec.Replicas case targetGVKR.Group == "apps" && targetGVKR.Kind == "StatefulSet": statefulSet := &appsv1.StatefulSet{} - err := e.client.Get(ctx, client.ObjectKey{Name: targetGVKR.Resource, Namespace: scaledObject.Namespace}, statefulSet) + err := e.client.Get(ctx, client.ObjectKey{Name: targetName, Namespace: scaledObject.Namespace}, statefulSet) if err != nil { logger.Error(err, "Error getting information on the current Scale (ie. replicas count) on the scaleTarget") return