Skip to content

Commit 90bccb0

Browse files
authored
Merge pull request #3969 from brett-elliott/metrics2
Log failed scale up metric based on string value of AutoscalerErrorType.
2 parents c2c5eaa + 013fa19 commit 90bccb0

File tree

4 files changed

+23
-11
lines changed

4 files changed

+23
-11
lines changed

cluster-autoscaler/core/scale_up.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,12 +666,8 @@ func executeScaleUp(context *context.AutoscalingContext, clusterStateRegistry *c
666666
increase := info.NewSize - info.CurrentSize
667667
if err := info.Group.IncreaseSize(increase); err != nil {
668668
context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err)
669-
reason := metrics.CloudProviderError
670669
aerr := errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to increase node group size: %v", err)
671-
if aerr.Type() == errors.AuthorizationError {
672-
reason = metrics.AuthorizationError
673-
}
674-
clusterStateRegistry.RegisterFailedScaleUp(info.Group, reason, now)
670+
clusterStateRegistry.RegisterFailedScaleUp(info.Group, metrics.FailedScaleUpReason(string(aerr.Type())), now)
675671
return aerr
676672
}
677673
clusterStateRegistry.RegisterOrUpdateScaleUp(

cluster-autoscaler/core/scale_up_test.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ package core
1818

1919
import (
2020
"fmt"
21+
"net/http"
22+
"net/http/httptest"
2123
"regexp"
2224
"strings"
2325
"testing"
@@ -30,12 +32,14 @@ import (
3032
"k8s.io/autoscaler/cluster-autoscaler/config"
3133
"k8s.io/autoscaler/cluster-autoscaler/core/utils"
3234
"k8s.io/autoscaler/cluster-autoscaler/estimator"
35+
"k8s.io/autoscaler/cluster-autoscaler/metrics"
3336
"k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset"
3437
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
3538
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
3639
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
3740
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
3841
kube_record "k8s.io/client-go/tools/record"
42+
"k8s.io/component-base/metrics/legacyregistry"
3943

4044
appsv1 "k8s.io/api/apps/v1"
4145
apiv1 "k8s.io/api/core/v1"
@@ -974,17 +978,33 @@ func TestCheckScaleUpDeltaWithinLimits(t *testing.T) {
974978
}
975979

976980
func TestAuthError(t *testing.T) {
981+
metrics.RegisterAll()
977982
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, nil, nil, nil)
978983
assert.NoError(t, err)
979984

980985
nodeGroup := &mockprovider.NodeGroup{}
981986
info := nodegroupset.ScaleUpInfo{Group: nodeGroup}
982987
nodeGroup.On("Id").Return("A")
983-
nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AuthorizationError, ""))
988+
nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AutoscalerErrorType("abcd"), ""))
984989

985990
clusterStateRegistry := clusterstate.NewClusterStateRegistry(nil, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff())
986991

987992
aerr := executeScaleUp(&context, clusterStateRegistry, info, "", time.Now())
988993
assert.Error(t, aerr)
989-
assert.Equal(t, errors.AuthorizationError, aerr.Type())
994+
995+
req, err := http.NewRequest("GET", "/", nil)
996+
if err != nil {
997+
t.Fatal(err)
998+
}
999+
rr := httptest.NewRecorder()
1000+
handler := http.HandlerFunc(legacyregistry.Handler().ServeHTTP)
1001+
handler.ServeHTTP(rr, req)
1002+
1003+
// Check that the status code is what we expect.
1004+
if status := rr.Code; status != http.StatusOK {
1005+
t.Errorf("handler returned wrong status code: got %v want %v",
1006+
status, http.StatusOK)
1007+
}
1008+
// Check that the failed scale up reason is set correctly.
1009+
assert.Contains(t, rr.Body.String(), "cluster_autoscaler_failed_scale_ups_total{reason=\"abcd\"} 1")
9901010
}

cluster-autoscaler/metrics/metrics.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ const (
6565
APIError FailedScaleUpReason = "apiCallError"
6666
// Timeout was encountered when trying to scale-up
6767
Timeout FailedScaleUpReason = "timeout"
68-
// AuthorizationError is an authorization error.
69-
AuthorizationError FailedScaleUpReason = "authorizationError"
7068

7169
// autoscaledGroup is managed by CA
7270
autoscaledGroup NodeGroupType = "autoscaled"

cluster-autoscaler/utils/errors/errors.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ const (
6161
// NodeGroupDoesNotExistError signifies that a NodeGroup
6262
// does not exist.
6363
NodeGroupDoesNotExistError AutoscalerErrorType = "nodeGroupDoesNotExistError"
64-
// AuthorizationError signifies that an authorization error occurred.
65-
AuthorizationError AutoscalerErrorType = "authorizationError"
6664
)
6765

6866
// NewAutoscalerError returns new autoscaler error with a message constructed from format string

0 commit comments

Comments
 (0)