From 56e4c7f76d58b55992a265ad7950a9926953e7bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helgi=20=C3=9Eormar=20=C3=9Eorbj=C3=B6rnsson?= Date: Wed, 5 May 2021 23:23:26 -0700 Subject: [PATCH 1/5] Centralize configuration using a Configs struct Uses github.com/caarlos0/env/ to read ENV configuration, set type rules and few other helpers. Moves from vars and const's to passing in values consistently via functions, making testing more predictable and the flow a tad bit easier to understand. ROLLER_CHECK_DELAY is now deprecated in favor of ROLLER_INTERVAL to allow for stronger type management. This moves from an int to time.Duration, 30 vs 30s (or 30m, 30d, whatever time.ParseDuration can handle). ROLLER_CHECK_DELAY still works and is auto applied as ROLLER_INTERVAL in seconds if set. Brings in github.com/stretchr/testify as a direct depdency (previously indirect), helping with the new config testing. Further enhancements can be done in future commits to bring log handling more centralized and moving more things to structs to make assigning value easier --- README.md | 23 ++++++++-------- aws.go | 14 +++++----- aws_internal_test.go | 37 ++++++++++++------------- configs.go | 16 +++++++++++ go.mod | 9 ++++--- go.sum | 38 +++++++------------------- kubernetes.go | 22 +++++++-------- main.go | 53 +++++++++++++----------------------- main_test.go | 60 ++++++++++++++++++++++++++++------------- original_desired.go | 15 ++++------- roller.go | 24 ++++++++--------- roller_internal_test.go | 42 ++++++++++++++++++----------- 12 files changed, 182 insertions(+), 171 deletions(-) create mode 100644 configs.go diff --git a/README.md b/README.md index bc8bcb8..3760104 100644 --- a/README.md +++ b/README.md @@ -226,23 +226,24 @@ Several key areas of potential modification: ## Configuration ASG Roller takes its configuration via environment variables. All environment variables that affect ASG Roller begin with `ROLLER_`. -* `ROLLER_ASG`: comma-separated list of auto-scaling groups that should be managed. -* `ROLLER_KUBERNETES`: If set to `true`, will check if a new node is ready via-a-vis Kubernetes before declaring it "ready", and will drain an old node before eliminating it. Defaults to `true` when running in Kubernetes as a pod, `false` otherwise. -* `ROLLER_IGNORE_DAEMONSETS`: If set to `false`, will not reclaim a node until there are no DaemonSets running on the node; if set to `true` (default), will reclaim node when all regular pods are drained off, but will ignore the presence of DaemonSets, which should be present on every node anyways. Normally, you want this set to `true`, which is the default. -* `ROLLER_DELETE_LOCAL_DATA`: If set to `false` (default), will not reclaim a node until there are no pods with [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) running on the node; if set to `true`, will continue to terminate the pod and delete the local data before reclaiming the node. The default is `false` to maintain backward compatibility. -* `ROLLER_CHECK_DELAY`: Time, in seconds, between checks of ASG status. -* `ROLLER_CAN_INCREASE_MAX`: If set to `true`, will increase the ASG maximum size to accommodate the increase in desired count. If set to `false`, will instead error when desired is higher than max. -* `ROLLER_ORIGINAL_DESIRED_ON_TAG`: If set to `true`, will store the original desired value of the ASG as a tag on the ASG, with the key `aws-asg-roller/OriginalDesired`. This helps maintain state in the situation where the process terminates. -* `ROLLER_VERBOSE`: If set to `true`, will increase verbosity of logs. -* `KUBECONFIG`: Path to kubernetes config file for authenticating to the kubernetes cluster. Required only if `ROLLER_KUBERNETES` is `true` and we are not operating in a kubernetes cluster. +* `ROLLER_ASG` [`string`, required]: comma-separated list of auto-scaling groups that should be managed. +* `ROLLER_KUBERNETES` [`bool`, default: `true`]: If set to `true`, will check if a new node is ready via-a-vis Kubernetes before declaring it "ready", and will drain an old node before eliminating it. Defaults to `true` when running in Kubernetes as a pod, `false` otherwise. +* `ROLLER_IGNORE_DAEMONSETS` [`bool`, default: `true`]: If set to `false`, will not reclaim a node until there are no DaemonSets running on the node; if set to `true` (default), will reclaim node when all regular pods are drained off, but will ignore the presence of DaemonSets, which should be present on every node anyways. Normally, you want this set to `true`. +* `ROLLER_DELETE_LOCAL_DATA` [`bool`, default: `false`]: If set to `false` (default), will not reclaim a node until there are no pods with [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) running on the node; if set to `true`, will continue to terminate the pod and delete the local data before reclaiming the node. The default is `false` to maintain backward compatibility. +* `ROLLER_INTERVAL` [`time.Duration`, default: `30s`]: Time between roller runs. Takes time duration such as 10s, 10m, 10d +* `ROLLER_CHECK_DELAY` [`int`]: Time, in seconds, between checks of ASG status. deprecated, use `ROLLER_INTERVAL`. +* `ROLLER_CAN_INCREASE_MAX` `bool`: If set to `true`, will increase the ASG maximum size to accommodate the increase in desired count. If set to `false`, will instead error when desired is higher than max. +* `ROLLER_ORIGINAL_DESIRED_ON_TAG` [`bool`, default: `false`]: If set to `true`, will store the original desired value of the ASG as a tag on the ASG, with the key `aws-asg-roller/OriginalDesired`. This helps maintain state in the situation where the process terminates. +* `ROLLER_VERBOSE` [`bool`, default: `false`]: If set to `true`, will increase verbosity of logs. +* `KUBECONFIG` [`string`]: Path to kubernetes config file for authenticating to the kubernetes cluster. Required only if `ROLLER_KUBERNETES` is `true` and we are not operating in a kubernetes cluster. ## Interaction with cluster-autoscaler -[cluster-autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) is a tool that commonly used to automatically adjusts the size of the Kubernetes cluster. However, there might be some conflicts (see [#19](https://github.com/deitch/aws-asg-roller/issues/19) for more details) between cluster-autoscaler and aws-asg-roller when they are both trying to schedule the asg. A workaround was implemented in aws-asg-roller by annotating all the managed nodes with `cluster-autoscaler.kubernetes.io/scale-down-disabled` when rolling-update is required. +[cluster-autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) is a tool that commonly used to automatically adjusts the size of the Kubernetes cluster. However, there might be some conflicts (see [#19](https://github.com/deitch/aws-asg-roller/issues/19) for more details) between cluster-autoscaler and aws-asg-roller when they are both trying to schedule the asg. A workaround was implemented in aws-asg-roller by annotating all the managed nodes with `cluster-autoscaler.kubernetes.io/scale-down-disabled` when rolling-update is required. The general flow can be summarized as follow: * Check if any nodes in the asg needs to be updated. -* If there are nodes that needs to be updated, annotate all up-to-date or new nodes with `cluster-autoscaler.kubernetes.io/scale-down-disabled` +* If there are nodes that needs to be updated, annotate all up-to-date or new nodes with `cluster-autoscaler.kubernetes.io/scale-down-disabled` * Update asg to spin up a new node before draining any old nodes. * Sleep and repeat (i.e. annotate new unutilized node to prevent it from being scaled-down). * If all nodes are up-to-date, remove `cluster-autoscaler.kubernetes.io/scale-down-disabled` if any from all the nodes - i.e. normal cluster-autoscaler management resumes. diff --git a/aws.go b/aws.go index 4346dca..2d495f3 100644 --- a/aws.go +++ b/aws.go @@ -12,10 +12,10 @@ import ( "log" ) -func setAsgDesired(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, count int64) error { +func setAsgDesired(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, count int64, canIncreaseMax, verbose bool) error { if count > *asg.MaxSize { if canIncreaseMax { - err := setAsgMax(svc, asg, count) + err := setAsgMax(svc, asg, count, verbose) if err != nil { return err } @@ -43,9 +43,9 @@ func setAsgDesired(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, default: return fmt.Errorf("%s - unexpected and unknown AWS error: %v", errMsg, aerr.Error()) } - } else { - return fmt.Errorf("%s - unexpected and unknown non-AWS error: %v", errMsg, err.Error()) } + + return fmt.Errorf("%s - unexpected and unknown non-AWS error: %v", errMsg, err.Error()) } if verbose { log.Printf("increased ASG %s desired count to %d", *asg.AutoScalingGroupName, count) @@ -53,7 +53,7 @@ func setAsgDesired(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, return nil } -func setAsgMax(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, count int64) error { +func setAsgMax(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, count int64, verbose bool) error { if verbose { log.Printf("increasing ASG %s max size to %d to accommodate desired count", *asg.AutoScalingGroupName, count) } @@ -72,9 +72,9 @@ func setAsgMax(svc autoscalingiface.AutoScalingAPI, asg *autoscaling.Group, coun default: return fmt.Errorf("%s - unexpected and unknown AWS error: %v", errMsg, aerr.Error()) } - } else { - return fmt.Errorf("%s - unexpected and unknown non-AWS error: %v", errMsg, err.Error()) } + + return fmt.Errorf("%s - unexpected and unknown non-AWS error: %v", errMsg, err.Error()) } if verbose { log.Printf("increased ASG %s max size to %d to accommodate desired count", *asg.AutoScalingGroupName, count) diff --git a/aws_internal_test.go b/aws_internal_test.go index fb494bb..bdff553 100644 --- a/aws_internal_test.go +++ b/aws_internal_test.go @@ -309,24 +309,24 @@ func TestAwsSetAsgDesired(t *testing.T) { canIncreaseMax bool setErr error err error + verbose bool }{ - {3, 3, true, nil, nil}, - {2, 2, true, nil, nil}, - {15, 15, true, awserr.New(autoscaling.ErrCodeResourceContentionFault, "", nil), fmt.Errorf("unable to increase ASG mygroup desired count to 15 - ResourceContention")}, - {1, 1, true, awserr.New("testabc", "", nil), fmt.Errorf("unable to increase ASG mygroup desired count to 1 - unexpected and unknown AWS error")}, - {25, 25, true, fmt.Errorf("testabc"), fmt.Errorf("unable to increase ASG mygroup desired count to 25 - unexpected and unknown non-AWS error")}, - {31, 30, false, nil, fmt.Errorf("unable to increase ASG mygroup desired size to 31 as greater than max size 30")}, - {31, 30, true, nil, nil}, + {3, 3, true, nil, nil, false}, + {2, 2, true, nil, nil, false}, + {15, 15, true, awserr.New(autoscaling.ErrCodeResourceContentionFault, "", nil), fmt.Errorf("unable to increase ASG mygroup desired count to 15 - ResourceContention"), false}, + {1, 1, true, awserr.New("testabc", "", nil), fmt.Errorf("unable to increase ASG mygroup desired count to 1 - unexpected and unknown AWS error"), false}, + {25, 25, true, fmt.Errorf("testabc"), fmt.Errorf("unable to increase ASG mygroup desired count to 25 - unexpected and unknown non-AWS error"), false}, + {31, 30, false, nil, fmt.Errorf("unable to increase ASG mygroup desired size to 31 as greater than max size 30"), false}, + {31, 30, true, nil, nil, false}, } for i, tt := range tests { asg := &autoscaling.Group{ AutoScalingGroupName: &groupName, MaxSize: &tt.max, } - canIncreaseMax = tt.canIncreaseMax err := setAsgDesired(&mockAsgSvc{ err: tt.setErr, - }, asg, tt.desired) + }, asg, tt.desired, tt.canIncreaseMax, tt.verbose) switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): t.Errorf("%d: Mismatched error, actual then expected", i) @@ -339,15 +339,16 @@ func TestAwsSetAsgDesired(t *testing.T) { func TestAwsSetAsgMax(t *testing.T) { groupName := "mygroup" tests := []struct { - max int64 - setErr error - err error + max int64 + setErr error + err error + verbose bool }{ - {3, nil, nil}, - {2, nil, nil}, - {15, awserr.New(autoscaling.ErrCodeResourceContentionFault, "", nil), fmt.Errorf("unable to increase ASG mygroup max size to 15 - ResourceContention")}, - {1, awserr.New("testabc", "", nil), fmt.Errorf("unable to increase ASG mygroup max size to 1 - unexpected and unknown AWS error: testabc")}, - {25, fmt.Errorf("testabc"), fmt.Errorf("unable to increase ASG mygroup max size to 25 - unexpected and unknown non-AWS error: testabc")}, + {3, nil, nil, false}, + {2, nil, nil, false}, + {15, awserr.New(autoscaling.ErrCodeResourceContentionFault, "", nil), fmt.Errorf("unable to increase ASG mygroup max size to 15 - ResourceContention"), false}, + {1, awserr.New("testabc", "", nil), fmt.Errorf("unable to increase ASG mygroup max size to 1 - unexpected and unknown AWS error: testabc"), false}, + {25, fmt.Errorf("testabc"), fmt.Errorf("unable to increase ASG mygroup max size to 25 - unexpected and unknown non-AWS error: testabc"), false}, } for i, tt := range tests { asg := &autoscaling.Group{ @@ -355,7 +356,7 @@ func TestAwsSetAsgMax(t *testing.T) { } err := setAsgMax(&mockAsgSvc{ err: tt.setErr, - }, asg, tt.max) + }, asg, tt.max, tt.verbose) switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): t.Errorf("%d: Mismatched error, actual then expected", i) diff --git a/configs.go b/configs.go new file mode 100644 index 0000000..2401489 --- /dev/null +++ b/configs.go @@ -0,0 +1,16 @@ +package main + +import "time" + +// Configs struct deals with env configuration +type Configs struct { + Interval time.Duration `env:"ROLLER_INTERVAL" envDefault:"30s"` + CheckDelay int `env:"ROLLER_CHECK_DELAY" envDefault:"30"` + IncreaseMax bool `env:"ROLLER_CAN_INCREASE_MAX" envDefault:"false"` + IgnoreDaemonSets bool `env:"ROLLER_IGNORE_DAEMONSETS" envDefault:"false"` + DeleteLocalData bool `env:"ROLLER_DELETE_LOCAL_DATA" envDefault:"false"` + OriginalDesiredOnTag bool `env:"ROLLER_ORIGINAL_DESIRED_ON_TAG" envDefault:"false"` + ASGS []string `env:"ROLLER_ASG,required" envSeparator:","` + KubernetesEnabled bool `env:"ROLLER_KUBERNETES" envDefault:"true"` + Verbose bool `env:"ROLLER_VERBOSE" envDefault:"false"` +} diff --git a/go.mod b/go.mod index 57a7917..81f8c6b 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/deitch/aws-asg-roller go 1.12 require ( - github.com/alexkohler/nakedret v1.0.0 // indirect github.com/aws/aws-sdk-go v1.21.8 + github.com/caarlos0/env/v6 v6.5.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-log/log v0.2.0 // indirect github.com/gogo/protobuf v0.0.0-20170330071051-c0656edd0d9e // indirect @@ -13,19 +13,22 @@ require ( github.com/googleapis/gnostic v0.0.0-20170729233727-0c5108395e2d // indirect github.com/gregjones/httpcache v0.0.0-20170728041850-787624de3eb7 // indirect github.com/imdario/mergo v0.3.6 // indirect + github.com/kr/pretty v0.1.0 // indirect github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect github.com/openshift/kubernetes-drain v0.0.0-20180831174519-c2e51be1758e github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/spf13/pflag v1.0.3 // indirect - github.com/stretchr/testify v1.3.0 // indirect + github.com/stretchr/testify v1.7.0 + golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 // indirect + golang.org/x/net v0.0.0-20200226121028-0de0cce0169b // indirect golang.org/x/oauth2 v0.0.0-20170412232759-a6bd8cefa181 // indirect golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e // indirect golang.org/x/time v0.0.0-20161028155119-f51c12702a4d // indirect google.golang.org/appengine v1.3.0 // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/inf.v0 v0.9.0 // indirect k8s.io/api v0.0.0-20181004124137-fd83cbc87e76 k8s.io/apimachinery v0.0.0-20180913025736-6dd46049f395 k8s.io/client-go v9.0.0+incompatible k8s.io/kube-openapi v0.0.0-20190426233423-c5d3b0f4bee0 // indirect - mvdan.cc/unparam v0.0.0-20200314162735-0ac8026f7d06 // indirect ) diff --git a/go.sum b/go.sum index 80c396c..b70422a 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,10 @@ github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/alexkohler/nakedret v1.0.0 h1:S/bzOFhZHYUJp6qPmdXdFHS5nlWGFmLmoc8QOydvotE= -github.com/alexkohler/nakedret v1.0.0/go.mod h1:tfDQbtPt67HhBK/6P0yNktIX7peCxfOp0jO9007DrLE= -github.com/aws/aws-sdk-go v1.15.73 h1:Xzo/nSFgDfRNHRkXc03nT9389YFFaxqsy9clPwAoff0= -github.com/aws/aws-sdk-go v1.15.73/go.mod h1:E3/ieXAlvM0XWO57iftYVDLLvQ824smPP3ATZkfNZeM= github.com/aws/aws-sdk-go v1.21.8 h1:Lv6hW2twBhC6mGZAuWtqplEpIIqtVctJg02sE7Qn0Zw= github.com/aws/aws-sdk-go v1.21.8/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= +github.com/caarlos0/env/v6 v6.5.0 h1:f4C7ZQwm0nRFo8vETCQviLUOtOlOwsOhgc/QXp0zrTM= +github.com/caarlos0/env/v6 v6.5.0/go.mod h1:5ZqhjfyF261xGkANuSuMQ1FeA9ikA3wzDY64wSd9k8k= github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -42,8 +40,10 @@ github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5i github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3 h1:/UewZcckqhvnnS0C6r3Sher2hSEbVmM6Ogpcjen08+Y= github.com/json-iterator/go v0.0.0-20180612202835-f2b4162afba3/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= @@ -63,38 +63,26 @@ github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.5.2/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v0.0.0-20151208002404-e3a8ff8ce365/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/tsenart/deadcode v0.0.0-20160724212837-210d2dc333e9 h1:vY5WqiEon0ZSTGM3ayVVi+twaHKHDFUVloaQ/wug9/c= -github.com/tsenart/deadcode v0.0.0-20160724212837-210d2dc333e9/go.mod h1:q+QjxYvZ+fpjMXqs+XEriussHjSYqeXVnAdSV1tkMYk= -github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -golang.org/x/crypto v0.0.0-20180808211826-de0752318171 h1:vYogbvSFj2YXcjQxFHu/rASSOt9sLytpCaSkiwQ135I= -golang.org/x/crypto v0.0.0-20180808211826-de0752318171/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180724234803-3673e40ba225 h1:kNX+jCowfMYzvlSvJu5pQWEmyWFrBXJ3PBy10xKMXK8= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20170412232759-a6bd8cefa181 h1:/4OaQ4bC66Oq9JDhUnxTjBGt8XBhDuwgMRXHgvfcCUY= golang.org/x/oauth2 v0.0.0-20170412232759-a6bd8cefa181/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8 h1:YoY1wS6JYVRpIfFngRf2HHo9R9dAne3xbkGOQ5rJXjU= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -103,24 +91,18 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d h1:TnM+PKb3ylGmZvyPXmo9m/wktg7Jn/a/fNmr33HSj8g= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09 h1:6Cq5LXQ/D2J5E7sYJemWSQApczOzY1rxSp8TWloyxIY= golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200313205530-4303120df7d8 h1:gkI/wGGwpcG5W4hLCzZNGxA4wzWBGGDStRI1MrjDl2Q= -golang.org/x/tools v0.0.0-20200313205530-4303120df7d8/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.3.0 h1:FBSsiFRMz3LBeXIomRnVzrQwSDj4ibvcRexLG0LZGQk= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/inf.v0 v0.9.0 h1:3zYtXIO92bvsdS3ggAdA8Gb4Azj0YU+TVY1uGYNFA8o= gopkg.in/inf.v0 v0.9.0/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/yaml.v2 v2.2.1 h1:mUhvW9EsL+naU5Q3cakzfE91YhliOondGd6ZrsDBHQE= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= k8s.io/api v0.0.0-20181004124137-fd83cbc87e76 h1:cGc6jt7tNK7a2WfgNKjxjoU/UXXr9Q7JTqvCupZ+6+Y= k8s.io/api v0.0.0-20181004124137-fd83cbc87e76/go.mod h1:iuAfoD4hCxJ8Onx9kaTIt30j7jUFS00AXQi6QMi99vA= k8s.io/apimachinery v0.0.0-20180913025736-6dd46049f395 h1:X+c9tYTDc9Pmt+Z1YSMqmUTCYf13VYe1u+ZwzjgpK0M= @@ -131,6 +113,4 @@ k8s.io/gengo v0.0.0-20190128074634-0689ccc1d7d6/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8 k8s.io/klog v0.0.0-20181102134211-b9b56d5dfc92/go.mod h1:Gq+BEi5rUBO/HRz0bTSXDUcqjScdoY3a9IHpCEIOOfk= k8s.io/kube-openapi v0.0.0-20190426233423-c5d3b0f4bee0 h1:D//p4U8H78y6as0HmrCe8QucazHZqI6hs9eu+rJflKw= k8s.io/kube-openapi v0.0.0-20190426233423-c5d3b0f4bee0/go.mod h1:iU+ZGYsNlvU9XKUSso6SQfKTCCw7lFduMZy26Mgr2Fw= -mvdan.cc/unparam v0.0.0-20200314162735-0ac8026f7d06 h1:evGBPL1nfLr4BUt+I0IV8q6P3oOqXhmC+hc8aw7xO8A= -mvdan.cc/unparam v0.0.0-20200314162735-0ac8026f7d06/go.mod h1:A9jtdiT4gKMLUlAQjDEVC18O9SOJ8ZAiqWfq9g3rlj8= sigs.k8s.io/structured-merge-diff v0.0.0-20190426204423-ea680f03cc65/go.mod h1:wWxsB5ozmmv/SG7nM11ayaAW51xMvak/t1r0CSlcokI= diff --git a/kubernetes.go b/kubernetes.go index 60e8303..cd8465e 100644 --- a/kubernetes.go +++ b/kubernetes.go @@ -82,19 +82,17 @@ func (k *kubernetesReadiness) prepareTermination(hostnames []string, ids []strin return nil } -func kubeGetClientset() (*kubernetes.Clientset, error) { - envValue := os.Getenv("ROLLER_KUBERNETES") +func kubeGetClientset(kubernetesEnabled bool) (*kubernetes.Clientset, error) { // if it is *explicitly* set to false, then do nothing - if envValue == "false" { + if !kubernetesEnabled { return nil, nil } - // if it is not explicitly set to false, then it depends on if we are in a cluster or not - useKube := envValue == "true" + // creates the in-cluster config config, err := rest.InClusterConfig() if err != nil { if err == rest.ErrNotInCluster { - if !useKube { + if !kubernetesEnabled { return nil, nil } config, err = getKubeOutOfCluster() @@ -136,8 +134,8 @@ func homeDir() string { return os.Getenv("USERPROFILE") // windows } -func kubeGetReadinessHandler(ignoreDaemonSets bool, deleteLocalData bool) (readiness, error) { - clientset, err := kubeGetClientset() +func kubeGetReadinessHandler(kubernetesEnabled, ignoreDaemonSets, deleteLocalData bool) (readiness, error) { + clientset, err := kubeGetClientset(kubernetesEnabled) if err != nil { log.Fatalf("Error getting kubernetes connection: %v", err) } @@ -150,7 +148,7 @@ func kubeGetReadinessHandler(ignoreDaemonSets bool, deleteLocalData bool) (readi // setScaleDownDisabledAnnotation set the "cluster-autoscaler.kubernetes.io/scale-down-disabled" annotation // on the list of nodes if required. Returns a list of 151 where the annotation // is applied. -func setScaleDownDisabledAnnotation(hostnames []string) ([]string, error) { +func setScaleDownDisabledAnnotation(kubernetesEnabled bool, hostnames []string) ([]string, error) { // get the node reference - first need the hostname var ( node *corev1.Node @@ -158,7 +156,7 @@ func setScaleDownDisabledAnnotation(hostnames []string) ([]string, error) { key = clusterAutoscalerScaleDownDisabledFlag annotated = []string{} ) - clientset, err := kubeGetClientset() + clientset, err := kubeGetClientset(kubernetesEnabled) if err != nil { log.Fatalf("Error getting kubernetes connection: %v", err) } @@ -184,14 +182,14 @@ func setScaleDownDisabledAnnotation(hostnames []string) ([]string, error) { } return annotated, nil } -func removeScaleDownDisabledAnnotation(hostnames []string) error { +func removeScaleDownDisabledAnnotation(kubernetesEnabled bool, hostnames []string) error { // get the node reference - first need the hostname var ( node *corev1.Node err error key = clusterAutoscalerScaleDownDisabledFlag ) - clientset, err := kubeGetClientset() + clientset, err := kubeGetClientset(kubernetesEnabled) if err != nil { log.Fatalf("Error getting kubernetes connection: %v", err) } diff --git a/main.go b/main.go index 7bbca3d..95fc2e1 100644 --- a/main.go +++ b/main.go @@ -1,34 +1,19 @@ package main import ( - "fmt" "log" "os" - "strconv" "strings" "time" -) - -const ( - asgCheckDelay = 30 // Default delay between checks of ASG status in seconds -) -var ( - verbose = os.Getenv("ROLLER_VERBOSE") == "true" - canIncreaseMax = os.Getenv("ROLLER_CAN_INCREASE_MAX") == "true" + env "github.com/caarlos0/env/v6" ) func main() { - asgList := strings.Split(os.Getenv("ROLLER_ASG"), ",") - if len(asgList) == 0 { - log.Fatal("Must supply at least one ASG in ROLLER_ASG environment variable") - } + configs := getConfigs() - // get config env - ignoreDaemonSets := os.Getenv("ROLLER_IGNORE_DAEMONSETS") != "false" - deleteLocalData := strings.ToLower(os.Getenv("ROLLER_DELETE_LOCAL_DATA")) == "true" // get a kube connection - readinessHandler, err := kubeGetReadinessHandler(ignoreDaemonSets, deleteLocalData) + readinessHandler, err := kubeGetReadinessHandler(configs.KubernetesEnabled, configs.IgnoreDaemonSets, configs.DeleteLocalData) if err != nil { log.Fatalf("Error getting kubernetes readiness handler when required: %v", err) } @@ -42,33 +27,31 @@ func main() { // to keep track of original target sizes during rolling updates originalDesired := map[string]int64{} - checkDelay, err := getDelay() - if err != nil { - log.Fatalf("Unable to get delay: %s", err.Error()) - } - // infinite loop for { - err := adjust(asgList, ec2Svc, asgSvc, readinessHandler, originalDesired) + err := adjust(configs.KubernetesEnabled, configs.ASGS, ec2Svc, asgSvc, readinessHandler, originalDesired, configs.OriginalDesiredOnTag, configs.IncreaseMax, configs.Verbose) if err != nil { log.Printf("Error adjusting AutoScaling Groups: %v", err) } // delay with each loop - log.Printf("Sleeping %d seconds\n", checkDelay) - time.Sleep(time.Duration(checkDelay) * time.Second) + log.Printf("Sleeping %d seconds\n", configs.Interval) + time.Sleep(configs.Interval) } } -// Returns delay value to use in loop. Uses default if not defined. -func getDelay() (int, error) { - delayOverride, exist := os.LookupEnv("ROLLER_CHECK_DELAY") - if exist { - delay, err := strconv.Atoi(delayOverride) - if err != nil { - return -1, fmt.Errorf("ROLLER_CHECK_DELAY is not parsable: %v (%s)", delayOverride, err.Error()) +func getConfigs() (configs Configs) { + // Compat helper + val, ok := os.LookupEnv("ROLLER_CHECK_DELAY") + if ok { + // Use value from check delay to set an interval + if !strings.HasSuffix(val, "s") { + os.Setenv("ROLLER_INTERVAL", val+"s") } - return delay, nil } - return asgCheckDelay, nil + if err := env.Parse(&configs); err != nil { + log.Panicf("unexpected error while initializing the config: %v", err) + } + + return configs } diff --git a/main_test.go b/main_test.go index afe9248..b4d33b5 100644 --- a/main_test.go +++ b/main_test.go @@ -2,39 +2,63 @@ package main import ( "os" + "reflect" "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func TestGetDelay(t *testing.T) { +func setBaseEnvs() { + os.Clearenv() + + os.Setenv("ROLLER_ASG", "group1") + os.Setenv("ROLLER_INTERVAL", "30s") +} + +func TestGetConfigs(t *testing.T) { tests := []struct { + env string name string - want int + field string + want interface{} envValue string shouldError bool }{ - {"should return default", 30, "", false}, - {"should return override", 17, "17", false}, - {"should error if override invalid", 0, "fake", true}, + // check delay gets translated to interval + {"ROLLER_CHECK_DELAY", "should return default", "Interval", time.Duration(30 * time.Second), "", false}, + {"ROLLER_CHECK_DELAY", "should return override", "Interval", time.Duration(17 * time.Second), "17", false}, + {"ROLLER_CHECK_DELAY", "should fail due to wrong type", "CheckDelay", 0, "17s", true}, + {"ROLLER_CHECK_DELAY", "should error if override invalid", "CheckDelay", 0, "fake", true}, + {"ROLLER_INTERVAL", "should return default", "Interval", time.Duration(30 * time.Second), "", false}, + {"ROLLER_INTERVAL", "should fail due to wrong type", "Interval", 0, "17", true}, + {"ROLLER_INTERVAL", "should return override", "Interval", time.Duration(17 * time.Second), "17s", false}, + {"ROLLER_INTERVAL", "should error if override invalid", "Interval", 0, "fake", true}, + {"ROLLER_ASG", "should error on empty", "ASGS", 0, "", true}, + {"ROLLER_ASG", "should work with single value", "ASGS", []string{"grp1"}, "grp1", false}, + {"ROLLER_ASG", "should work with multiple values", "ASGS", []string{"grp1", "grp2"}, "grp1,grp2", false}, + {"ROLLER_ASG", "should work with multiple values with space after comma", "ASGS", []string{"grp1", " grp2"}, "grp1, grp2", false}, } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - os.Unsetenv("ROLLER_CHECK_DELAY") + t.Run(tt.env+":"+tt.name, func(t *testing.T) { + setBaseEnvs() + os.Unsetenv(tt.env) if tt.envValue != "" { - os.Setenv("ROLLER_CHECK_DELAY", tt.envValue) + os.Setenv(tt.env, tt.envValue) } - got, err := getDelay() - if err != nil { - if !tt.shouldError { - t.Errorf("getDelay() returned error: %s", err.Error()) - } + if tt.shouldError { + require.Panics(t, func() { + getConfigs() + }) } else { - if tt.shouldError { - t.Error("getDelay() should have returned error") - } else if got != tt.want { - t.Errorf("getDelay() = %v, want %v", got, tt.want) - } + got := getConfigs() + // use reflect to access struct dynamically + r := reflect.ValueOf(got) + f := reflect.Indirect(r).FieldByName(tt.field).Interface() + assert.EqualValues(t, tt.want, f) } }) } diff --git a/original_desired.go b/original_desired.go index a89fae4..8d6d854 100644 --- a/original_desired.go +++ b/original_desired.go @@ -3,7 +3,6 @@ package main import ( "fmt" "log" - "os" "strconv" "github.com/aws/aws-sdk-go/aws" @@ -13,18 +12,14 @@ import ( const asgTagNameOriginalDesired = "aws-asg-roller/OriginalDesired" -var ( - storeOriginalDesiredOnTag = os.Getenv("ROLLER_ORIGINAL_DESIRED_ON_TAG") == "true" -) - // Populates the original desired values for each ASG, based on the current 'desired' value if unkonwn. // The original desired value is recorded as a tag on the respective ASG. Subsequent runs attempt to // read the value of the tag to preserve state in the case of the process terminating. -func populateOriginalDesired(originalDesired map[string]int64, asgs []*autoscaling.Group, asgSvc autoscalingiface.AutoScalingAPI) error { +func populateOriginalDesired(originalDesired map[string]int64, asgs []*autoscaling.Group, asgSvc autoscalingiface.AutoScalingAPI, storeOriginalDesiredOnTag bool, verbose bool) error { for _, asg := range asgs { asgName := *asg.AutoScalingGroupName if storeOriginalDesiredOnTag { - tagOriginalDesired, err := getOriginalDesiredTag(asgSvc, asgName) + tagOriginalDesired, err := getOriginalDesiredTag(asgSvc, asgName, verbose) if err != nil { return err } @@ -39,7 +34,7 @@ func populateOriginalDesired(originalDesired map[string]int64, asgs []*autoscali log.Printf("guessed desired value of %d from current desired on ASG: %s", *asg.DesiredCapacity, asgName) } if storeOriginalDesiredOnTag { - err := setOriginalDesiredTag(asgSvc, asgName, asg) + err := setOriginalDesiredTag(asgSvc, asgName, asg, verbose) if err != nil { return err } @@ -52,7 +47,7 @@ func populateOriginalDesired(originalDesired map[string]int64, asgs []*autoscali // returns // the original desired value from the tag, if present, otherwise -1 // error -func getOriginalDesiredTag(asgSvc autoscalingiface.AutoScalingAPI, asgName string) (int64, error) { +func getOriginalDesiredTag(asgSvc autoscalingiface.AutoScalingAPI, asgName string, verbose bool) (int64, error) { tags, err := asgSvc.DescribeTags(&autoscaling.DescribeTagsInput{ Filters: []*autoscaling.Filter{ { @@ -81,7 +76,7 @@ func getOriginalDesiredTag(asgSvc autoscalingiface.AutoScalingAPI, asgName strin } // record original desired value on a tag, in case of process restart -func setOriginalDesiredTag(asgSvc autoscalingiface.AutoScalingAPI, asgName string, asg *autoscaling.Group) error { +func setOriginalDesiredTag(asgSvc autoscalingiface.AutoScalingAPI, asgName string, asg *autoscaling.Group, verbose bool) error { _, err := asgSvc.CreateOrUpdateTags(&autoscaling.CreateOrUpdateTagsInput{ Tags: []*autoscaling.Tag{ { diff --git a/roller.go b/roller.go index 9ff0eb1..b5d79a5 100644 --- a/roller.go +++ b/roller.go @@ -16,7 +16,7 @@ const ( ) // adjust runs a single adjustment in the loop to update an ASG in a rolling fashion to latest launch config -func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.AutoScalingAPI, readinessHandler readiness, originalDesired map[string]int64) error { +func adjust(kubernetesEnabled bool, asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.AutoScalingAPI, readinessHandler readiness, originalDesired map[string]int64, storeOriginalDesiredOnTag, canIncreaseMax, verbose bool) error { // get information on all of the groups asgs, err := awsDescribeGroups(asgSvc, asgList) if err != nil { @@ -24,7 +24,7 @@ func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.Au } // look up and record original desired values - err = populateOriginalDesired(originalDesired, asgs, asgSvc) + err = populateOriginalDesired(originalDesired, asgs, asgSvc, storeOriginalDesiredOnTag, verbose) if err != nil { return fmt.Errorf("unexpected error looking up original desired values for ASGs, skipping: %v", err) } @@ -33,14 +33,14 @@ func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.Au // get information on all of the ec2 instances instances := make([]*autoscaling.Instance, 0) for _, asg := range asgs { - oldInstances, newInstances, err := groupInstances(asg, ec2Svc) + oldInstances, newInstances, err := groupInstances(asg, ec2Svc, verbose) if err != nil { return fmt.Errorf("unable to group instances into new and old: %v", err) } // if there are no outdated instances skip updating if len(oldInstances) == 0 && *asg.DesiredCapacity == originalDesired[*asg.AutoScalingGroupName] { log.Printf("[%s] ok\n", *asg.AutoScalingGroupName) - err := ensureNoScaleDownDisabledAnnotation(ec2Svc, mapInstancesIds(asg.Instances)) + err := ensureNoScaleDownDisabledAnnotation(kubernetesEnabled, ec2Svc, mapInstancesIds(asg.Instances)) if err != nil { log.Printf("[%s] Unable to update node annotations: %v\n", *asg.AutoScalingGroupName, err) } @@ -71,7 +71,7 @@ func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.Au // keep keyed references to the ASGs for _, asg := range asgMap { - newDesiredA, terminateID, err := calculateAdjustment(asg, ec2Svc, hostnameMap, readinessHandler, originalDesired[*asg.AutoScalingGroupName]) + newDesiredA, terminateID, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, readinessHandler, originalDesired[*asg.AutoScalingGroupName], verbose) log.Printf("[%v] desired: %d original: %d", p2v(asg.AutoScalingGroupName), newDesiredA, originalDesired[*asg.AutoScalingGroupName]) if err != nil { log.Printf("[%v] error calculating adjustment - skipping: %v\n", p2v(asg.AutoScalingGroupName), err) @@ -88,7 +88,7 @@ func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.Au // adjust current desired for asg, desired := range newDesired { log.Printf("[%s] set desired instances: %d\n", asg, desired) - err = setAsgDesired(asgSvc, asgMap[asg], desired) + err = setAsgDesired(asgSvc, asgMap[asg], desired, canIncreaseMax, verbose) if err != nil { return fmt.Errorf("[%s] error setting desired to %d: %v", asg, desired, err) } @@ -107,12 +107,12 @@ func adjust(asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.Au // ensureNoScaleDownDisabledAnnotation remove any "cluster-autoscaler.kubernetes.io/scale-down-disabled" // annotations in the nodes as no update is required anymore. -func ensureNoScaleDownDisabledAnnotation(ec2Svc ec2iface.EC2API, ids []string) error { +func ensureNoScaleDownDisabledAnnotation(kubernetesEnabled bool, ec2Svc ec2iface.EC2API, ids []string) error { hostnames, err := awsGetHostnames(ec2Svc, ids) if err != nil { return fmt.Errorf("unable to get aws hostnames for ids %v: %v", ids, err) } - return removeScaleDownDisabledAnnotation(hostnames) + return removeScaleDownDisabledAnnotation(kubernetesEnabled, hostnames) } // calculateAdjustment calculates the new settings for the desired number, and which node (if any) to terminate @@ -121,11 +121,11 @@ func ensureNoScaleDownDisabledAnnotation(ec2Svc ec2iface.EC2API, ids []string) e // what the new desired number of instances should be // ID of an instance to terminate, "" if none // error -func calculateAdjustment(asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnameMap map[string]string, readinessHandler readiness, originalDesired int64) (int64, string, error) { +func calculateAdjustment(kubernetesEnabled bool, asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnameMap map[string]string, readinessHandler readiness, originalDesired int64, verbose bool) (int64, string, error) { desired := *asg.DesiredCapacity // get instances with old launch config - oldInstances, newInstances, err := groupInstances(asg, ec2Svc) + oldInstances, newInstances, err := groupInstances(asg, ec2Svc, verbose) if err != nil { return originalDesired, "", fmt.Errorf("unable to group instances into new and old: %v", err) } @@ -187,7 +187,7 @@ func calculateAdjustment(asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnam for _, i := range ids { hostnames = append(hostnames, hostnameMap[i]) } - _, err = setScaleDownDisabledAnnotation(hostnames) + _, err = setScaleDownDisabledAnnotation(kubernetesEnabled, hostnames) if err != nil { log.Printf("Unable to set disabled scale down annotations: %v", err) } @@ -222,7 +222,7 @@ func calculateAdjustment(asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnam // groupInstances handles all of the logic for determining which nodes in the ASG have an old or outdated // config, and which are up to date. It should do nothing else. // The entire rest of the code should rely on this for making the determination -func groupInstances(asg *autoscaling.Group, ec2Svc ec2iface.EC2API) ([]*autoscaling.Instance, []*autoscaling.Instance, error) { +func groupInstances(asg *autoscaling.Group, ec2Svc ec2iface.EC2API, verbose bool) ([]*autoscaling.Instance, []*autoscaling.Instance, error) { oldInstances := make([]*autoscaling.Instance, 0) newInstances := make([]*autoscaling.Instance, 0) // we want to be able to handle LaunchTemplate as well diff --git a/roller_internal_test.go b/roller_internal_test.go index 28e094b..9b68418 100644 --- a/roller_internal_test.go +++ b/roller_internal_test.go @@ -11,6 +11,9 @@ import ( "github.com/aws/aws-sdk-go/service/ec2" ) +// Tests do not talk to a live kubernetes cluster +const kubernetesEnabled = false + type testReadyHandler struct { unreadyCount int unreadyError error @@ -67,30 +70,31 @@ func TestCalculateAdjustment(t *testing.T) { targetDesired int64 targetTerminate string err error + verbose bool }{ // 1 old, 2 new healthy, 0 new unhealthy, should terminate old - {[]string{"1"}, []string{"2", "3"}, []string{}, 3, 2, nil, 3, "1", nil}, + {[]string{"1"}, []string{"2", "3"}, []string{}, 3, 2, nil, 3, "1", nil, false}, // 0 old, 2 new healthy, 0 new unhealthy, should indicate end of process - {[]string{}, []string{"2", "3"}, []string{}, 2, 2, nil, 2, "", nil}, + {[]string{}, []string{"2", "3"}, []string{}, 2, 2, nil, 2, "", nil, false}, // 2 old, 0 new healthy, 0 new unhealthy, should indicate start of process - {[]string{"1", "2"}, []string{}, []string{}, 2, 2, nil, 3, "", nil}, + {[]string{"1", "2"}, []string{}, []string{}, 2, 2, nil, 3, "", nil, false}, // 2 old, 0 new healthy, 0 new unhealthy, started, should not do anything until new healthy one - {[]string{"1", "2"}, []string{}, []string{}, 3, 2, nil, 3, "", nil}, + {[]string{"1", "2"}, []string{}, []string{}, 3, 2, nil, 3, "", nil, false}, // 2 old, 1 new healthy, 0 new unhealthy, remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, nil, 3, "1", nil}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, nil, 3, "1", nil, false}, // 2 old, 0 new healthy, 1 new unhealthy, started, should not do anything until new one is healthy - {[]string{"1", "2"}, []string{}, []string{"3"}, 3, 2, nil, 3, "", nil}, + {[]string{"1", "2"}, []string{}, []string{"3"}, 3, 2, nil, 3, "", nil, false}, // 2 old, 1 new healthy, 0 new unhealthy, 1 new unready, should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyCountHandler, 3, "", nil}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyCountHandler, 3, "", nil, false}, // 2 old, 1 new healthy, 0 new unhealthy, 0 new unready, 1 error: should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyErrorHandler, 3, "", fmt.Errorf("error")}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyErrorHandler, 3, "", fmt.Errorf("error"), false}, // 2 old, 1 new healthy, 0 new unhealthy, 0 unready, remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, readyHandler, 3, "1", nil}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, readyHandler, 3, "1", nil, false}, // 2 old, 1 new healthy, 0 new unhealthy, 0 new unready, 1 error: should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateErrorHandler, 3, "", fmt.Errorf("unexpected error")}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateErrorHandler, 3, "", fmt.Errorf("unexpected error"), false}, // 2 old, 1 new healthy, 0 new unhealthy, 0 unready, successful terminate: remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateHandler, 3, "1", nil}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateHandler, 3, "1", nil, false}, } hostnameMap := map[string]string{} for i := 0; i < 20; i++ { @@ -138,7 +142,7 @@ func TestCalculateAdjustment(t *testing.T) { ec2Svc := &mockEc2Svc{ autodescribe: true, } - desired, terminate, err := calculateAdjustment(asg, ec2Svc, hostnameMap, tt.readiness, tt.originalDesired) + desired, terminate, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, tt.readiness, tt.originalDesired, tt.verbose) switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): t.Errorf("%d: mismatched errors, actual then expected", i) @@ -167,6 +171,7 @@ func TestAdjust(t *testing.T) { terminate []string canIncreaseMax bool persistOriginalDesiredOnTag bool + verbose bool }{ { "2 asgs adjust first run", @@ -188,6 +193,7 @@ func TestAdjust(t *testing.T) { []string{}, false, false, + false, }, { "2 asgs adjust in progress", @@ -209,6 +215,7 @@ func TestAdjust(t *testing.T) { []string{}, false, false, + false, }, { "2 asgs adjust in progress with ROLLER_ORIGINAL_DESIRED_ON_TAG set to true", @@ -230,6 +237,7 @@ func TestAdjust(t *testing.T) { []string{"1"}, false, true, + false, }, { "2 asgs adjust complete", @@ -251,6 +259,7 @@ func TestAdjust(t *testing.T) { []string{}, false, false, + false, }, { "2 asgs adjust increase max fail", @@ -272,6 +281,7 @@ func TestAdjust(t *testing.T) { []string{}, false, false, + false, }, { "2 asgs adjust increase max succeed", @@ -293,6 +303,7 @@ func TestAdjust(t *testing.T) { []string{}, true, false, + false, }, } @@ -331,7 +342,7 @@ func TestAdjust(t *testing.T) { LaunchConfigurationName: &lcName, MaxSize: &max, } - storeOriginalDesiredOnTag = tt.persistOriginalDesiredOnTag + if tt.persistOriginalDesiredOnTag { if originalDesired, ok := tt.originalDesired[name]; ok { validGroup.Tags = []*autoscaling.TagDescription{ @@ -364,8 +375,7 @@ func TestAdjust(t *testing.T) { ks := k newDesiredPtr[&ks] = v } - canIncreaseMax = tt.canIncreaseMax - err := adjust(tt.asgs, ec2Svc, asgSvc, tt.handler, tt.originalDesired) + err := adjust(kubernetesEnabled, tt.asgs, ec2Svc, asgSvc, tt.handler, tt.originalDesired, tt.persistOriginalDesiredOnTag, tt.canIncreaseMax, tt.verbose) // what were our last calls to each? switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): @@ -419,7 +429,7 @@ func TestGroupInstances(t *testing.T) { ec2Svc := &mockEc2Svc{ autodescribe: true, } - oldInstances, newInstances, err := groupInstances(asg, ec2Svc) + oldInstances, newInstances, err := groupInstances(asg, ec2Svc, false) if err != nil { t.Errorf("unexpected error grouping instances: %v", err) return From d0d021ffd5bc5ae79301d8e59456ba34e0f7b207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helgi=20=C3=9Eormar=20=C3=9Eorbj=C3=B6rnsson?= Date: Thu, 6 May 2021 16:04:38 -0700 Subject: [PATCH 2/5] Add the ability to control if the roller drains and if it uses force Introduces ROLLER_DRAIN and ROLLER_DRAIN_FORCE, both defaulting to true to keep existing behaviour for compatibility Fixes #52 --- README.md | 2 ++ configs.go | 4 +++- kubernetes.go | 14 +++++++++---- main.go | 6 +++++- readiness.go | 2 +- roller.go | 8 ++++---- roller_internal_test.go | 44 ++++++++++++++++++++++++++++------------- 7 files changed, 55 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 3760104..1dfb09b 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,8 @@ ASG Roller takes its configuration via environment variables. All environment va * `ROLLER_ASG` [`string`, required]: comma-separated list of auto-scaling groups that should be managed. * `ROLLER_KUBERNETES` [`bool`, default: `true`]: If set to `true`, will check if a new node is ready via-a-vis Kubernetes before declaring it "ready", and will drain an old node before eliminating it. Defaults to `true` when running in Kubernetes as a pod, `false` otherwise. +* `ROLLER_DRAIN` [`bool`, default: `true`]: If set to `true`, will handle draining of pods and other kubernetes resources. Consider setting to false if your distribution has a built in drain on terminate. +* `ROLLER_DRAIN_FORCE` [`bool` default: `true`]: If drain will force delete kubernetes resources if they violate PDB or grace periods. * `ROLLER_IGNORE_DAEMONSETS` [`bool`, default: `true`]: If set to `false`, will not reclaim a node until there are no DaemonSets running on the node; if set to `true` (default), will reclaim node when all regular pods are drained off, but will ignore the presence of DaemonSets, which should be present on every node anyways. Normally, you want this set to `true`. * `ROLLER_DELETE_LOCAL_DATA` [`bool`, default: `false`]: If set to `false` (default), will not reclaim a node until there are no pods with [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) running on the node; if set to `true`, will continue to terminate the pod and delete the local data before reclaiming the node. The default is `false` to maintain backward compatibility. * `ROLLER_INTERVAL` [`time.Duration`, default: `30s`]: Time between roller runs. Takes time duration such as 10s, 10m, 10d diff --git a/configs.go b/configs.go index 2401489..8d7ce11 100644 --- a/configs.go +++ b/configs.go @@ -6,8 +6,10 @@ import "time" type Configs struct { Interval time.Duration `env:"ROLLER_INTERVAL" envDefault:"30s"` CheckDelay int `env:"ROLLER_CHECK_DELAY" envDefault:"30"` + Drain bool `env:"ROLLER_DRAIN" envDefault:"true"` + DrainForce bool `env:"ROLLER_DRAIN_FORCE" envDefault:"true"` IncreaseMax bool `env:"ROLLER_CAN_INCREASE_MAX" envDefault:"false"` - IgnoreDaemonSets bool `env:"ROLLER_IGNORE_DAEMONSETS" envDefault:"false"` + IgnoreDaemonSets bool `env:"ROLLER_IGNORE_DAEMONSETS" envDefault:"true"` DeleteLocalData bool `env:"ROLLER_DELETE_LOCAL_DATA" envDefault:"false"` OriginalDesiredOnTag bool `env:"ROLLER_ORIGINAL_DESIRED_ON_TAG" envDefault:"false"` ASGS []string `env:"ROLLER_ASG,required" envSeparator:","` diff --git a/kubernetes.go b/kubernetes.go index cd8465e..fc7b458 100644 --- a/kubernetes.go +++ b/kubernetes.go @@ -6,7 +6,7 @@ import ( "os" "path/filepath" - drain "github.com/openshift/kubernetes-drain" + drainer "github.com/openshift/kubernetes-drain" corev1 "k8s.io/api/core/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -57,22 +57,28 @@ func (k *kubernetesReadiness) getUnreadyCount(hostnames []string, ids []string) } return unReadyCount, nil } -func (k *kubernetesReadiness) prepareTermination(hostnames []string, ids []string) error { +func (k *kubernetesReadiness) prepareTermination(hostnames []string, ids []string, drain, drainForce bool) error { // get the node reference - first need the hostname var ( node *corev1.Node err error ) + + // Skip drain + if !drain { + return nil + } + for _, h := range hostnames { node, err = k.clientset.CoreV1().Nodes().Get(h, v1.GetOptions{}) if err != nil { return fmt.Errorf("Unexpected error getting kubernetes node %s: %v", h, err) } // set options and drain nodes - err = drain.Drain(k.clientset, []*corev1.Node{node}, &drain.DrainOptions{ + err = drainer.Drain(k.clientset, []*corev1.Node{node}, &drainer.DrainOptions{ IgnoreDaemonsets: k.ignoreDaemonSets, GracePeriodSeconds: -1, - Force: true, + Force: drainForce, DeleteLocalData: k.deleteLocalData, }) if err != nil { diff --git a/main.go b/main.go index 95fc2e1..1047b6c 100644 --- a/main.go +++ b/main.go @@ -29,7 +29,11 @@ func main() { // infinite loop for { - err := adjust(configs.KubernetesEnabled, configs.ASGS, ec2Svc, asgSvc, readinessHandler, originalDesired, configs.OriginalDesiredOnTag, configs.IncreaseMax, configs.Verbose) + err := adjust( + configs.KubernetesEnabled, configs.ASGS, ec2Svc, asgSvc, + readinessHandler, originalDesired, configs.OriginalDesiredOnTag, + configs.IncreaseMax, configs.Verbose, configs.Drain, configs.DrainForce, + ) if err != nil { log.Printf("Error adjusting AutoScaling Groups: %v", err) } diff --git a/readiness.go b/readiness.go index 4008465..de5eb48 100644 --- a/readiness.go +++ b/readiness.go @@ -2,5 +2,5 @@ package main type readiness interface { getUnreadyCount(hostnames []string, ids []string) (int, error) - prepareTermination(hostnames []string, ids []string) error + prepareTermination(hostnames []string, ids []string, drain, drainForce bool) error } diff --git a/roller.go b/roller.go index b5d79a5..986ff63 100644 --- a/roller.go +++ b/roller.go @@ -16,7 +16,7 @@ const ( ) // adjust runs a single adjustment in the loop to update an ASG in a rolling fashion to latest launch config -func adjust(kubernetesEnabled bool, asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.AutoScalingAPI, readinessHandler readiness, originalDesired map[string]int64, storeOriginalDesiredOnTag, canIncreaseMax, verbose bool) error { +func adjust(kubernetesEnabled bool, asgList []string, ec2Svc ec2iface.EC2API, asgSvc autoscalingiface.AutoScalingAPI, readinessHandler readiness, originalDesired map[string]int64, storeOriginalDesiredOnTag, canIncreaseMax, verbose, drain, drainForce bool) error { // get information on all of the groups asgs, err := awsDescribeGroups(asgSvc, asgList) if err != nil { @@ -71,7 +71,7 @@ func adjust(kubernetesEnabled bool, asgList []string, ec2Svc ec2iface.EC2API, as // keep keyed references to the ASGs for _, asg := range asgMap { - newDesiredA, terminateID, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, readinessHandler, originalDesired[*asg.AutoScalingGroupName], verbose) + newDesiredA, terminateID, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, readinessHandler, originalDesired[*asg.AutoScalingGroupName], verbose, drain, drainForce) log.Printf("[%v] desired: %d original: %d", p2v(asg.AutoScalingGroupName), newDesiredA, originalDesired[*asg.AutoScalingGroupName]) if err != nil { log.Printf("[%v] error calculating adjustment - skipping: %v\n", p2v(asg.AutoScalingGroupName), err) @@ -121,7 +121,7 @@ func ensureNoScaleDownDisabledAnnotation(kubernetesEnabled bool, ec2Svc ec2iface // what the new desired number of instances should be // ID of an instance to terminate, "" if none // error -func calculateAdjustment(kubernetesEnabled bool, asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnameMap map[string]string, readinessHandler readiness, originalDesired int64, verbose bool) (int64, string, error) { +func calculateAdjustment(kubernetesEnabled bool, asg *autoscaling.Group, ec2Svc ec2iface.EC2API, hostnameMap map[string]string, readinessHandler readiness, originalDesired int64, verbose, drain, drainForce bool) (int64, string, error) { desired := *asg.DesiredCapacity // get instances with old launch config @@ -209,7 +209,7 @@ func calculateAdjustment(kubernetesEnabled bool, asg *autoscaling.Group, ec2Svc err error ) hostname = hostnameMap[candidate] - err = readinessHandler.prepareTermination([]string{hostname}, []string{candidate}) + err = readinessHandler.prepareTermination([]string{hostname}, []string{candidate}, drain, drainForce) if err != nil { return desired, "", fmt.Errorf("unexpected error readiness handler terminating node %s: %v", hostname, err) } diff --git a/roller_internal_test.go b/roller_internal_test.go index 9b68418..c614c42 100644 --- a/roller_internal_test.go +++ b/roller_internal_test.go @@ -23,7 +23,7 @@ type testReadyHandler struct { func (t *testReadyHandler) getUnreadyCount(hostnames []string, ids []string) (int, error) { return t.unreadyCount, t.unreadyError } -func (t *testReadyHandler) prepareTermination(hostnames []string, ids []string) error { +func (t *testReadyHandler) prepareTermination(hostnames []string, ids []string, drain, drainForce bool) error { return t.terminateError } @@ -71,30 +71,32 @@ func TestCalculateAdjustment(t *testing.T) { targetTerminate string err error verbose bool + drain bool + drainForce bool }{ // 1 old, 2 new healthy, 0 new unhealthy, should terminate old - {[]string{"1"}, []string{"2", "3"}, []string{}, 3, 2, nil, 3, "1", nil, false}, + {[]string{"1"}, []string{"2", "3"}, []string{}, 3, 2, nil, 3, "1", nil, false, true, true}, // 0 old, 2 new healthy, 0 new unhealthy, should indicate end of process - {[]string{}, []string{"2", "3"}, []string{}, 2, 2, nil, 2, "", nil, false}, + {[]string{}, []string{"2", "3"}, []string{}, 2, 2, nil, 2, "", nil, false, true, true}, // 2 old, 0 new healthy, 0 new unhealthy, should indicate start of process - {[]string{"1", "2"}, []string{}, []string{}, 2, 2, nil, 3, "", nil, false}, + {[]string{"1", "2"}, []string{}, []string{}, 2, 2, nil, 3, "", nil, false, true, true}, // 2 old, 0 new healthy, 0 new unhealthy, started, should not do anything until new healthy one - {[]string{"1", "2"}, []string{}, []string{}, 3, 2, nil, 3, "", nil, false}, + {[]string{"1", "2"}, []string{}, []string{}, 3, 2, nil, 3, "", nil, false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, nil, 3, "1", nil, false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, nil, 3, "1", nil, false, true, true}, // 2 old, 0 new healthy, 1 new unhealthy, started, should not do anything until new one is healthy - {[]string{"1", "2"}, []string{}, []string{"3"}, 3, 2, nil, 3, "", nil, false}, + {[]string{"1", "2"}, []string{}, []string{"3"}, 3, 2, nil, 3, "", nil, false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, 1 new unready, should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyCountHandler, 3, "", nil, false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyCountHandler, 3, "", nil, false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, 0 new unready, 1 error: should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyErrorHandler, 3, "", fmt.Errorf("error"), false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, unreadyErrorHandler, 3, "", fmt.Errorf("error"), false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, 0 unready, remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, readyHandler, 3, "1", nil, false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, readyHandler, 3, "1", nil, false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, 0 new unready, 1 error: should not change anything - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateErrorHandler, 3, "", fmt.Errorf("unexpected error"), false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateErrorHandler, 3, "", fmt.Errorf("unexpected error"), false, true, true}, // 2 old, 1 new healthy, 0 new unhealthy, 0 unready, successful terminate: remove an old one - {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateHandler, 3, "1", nil, false}, + {[]string{"1", "2"}, []string{"3"}, []string{}, 3, 2, terminateHandler, 3, "1", nil, false, true, true}, } hostnameMap := map[string]string{} for i := 0; i < 20; i++ { @@ -142,7 +144,7 @@ func TestCalculateAdjustment(t *testing.T) { ec2Svc := &mockEc2Svc{ autodescribe: true, } - desired, terminate, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, tt.readiness, tt.originalDesired, tt.verbose) + desired, terminate, err := calculateAdjustment(kubernetesEnabled, asg, ec2Svc, hostnameMap, tt.readiness, tt.originalDesired, tt.verbose, tt.drain, tt.drainForce) switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): t.Errorf("%d: mismatched errors, actual then expected", i) @@ -172,6 +174,8 @@ func TestAdjust(t *testing.T) { canIncreaseMax bool persistOriginalDesiredOnTag bool verbose bool + drain bool + drainForce bool }{ { "2 asgs adjust first run", @@ -194,6 +198,8 @@ func TestAdjust(t *testing.T) { false, false, false, + true, + true, }, { "2 asgs adjust in progress", @@ -216,6 +222,8 @@ func TestAdjust(t *testing.T) { false, false, false, + true, + true, }, { "2 asgs adjust in progress with ROLLER_ORIGINAL_DESIRED_ON_TAG set to true", @@ -238,6 +246,8 @@ func TestAdjust(t *testing.T) { false, true, false, + true, + true, }, { "2 asgs adjust complete", @@ -260,6 +270,8 @@ func TestAdjust(t *testing.T) { false, false, false, + true, + true, }, { "2 asgs adjust increase max fail", @@ -282,6 +294,8 @@ func TestAdjust(t *testing.T) { false, false, false, + true, + true, }, { "2 asgs adjust increase max succeed", @@ -304,6 +318,8 @@ func TestAdjust(t *testing.T) { true, false, false, + true, + true, }, } @@ -375,7 +391,7 @@ func TestAdjust(t *testing.T) { ks := k newDesiredPtr[&ks] = v } - err := adjust(kubernetesEnabled, tt.asgs, ec2Svc, asgSvc, tt.handler, tt.originalDesired, tt.persistOriginalDesiredOnTag, tt.canIncreaseMax, tt.verbose) + err := adjust(kubernetesEnabled, tt.asgs, ec2Svc, asgSvc, tt.handler, tt.originalDesired, tt.persistOriginalDesiredOnTag, tt.canIncreaseMax, tt.verbose, tt.drain, tt.drainForce) // what were our last calls to each? switch { case (err == nil && tt.err != nil) || (err != nil && tt.err == nil) || (err != nil && tt.err != nil && !strings.HasPrefix(err.Error(), tt.err.Error())): From 6c65942abb7281ef921e3614dc3f3d7db10410e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helgi=20=C3=9Eormar=20=C3=9Eorbj=C3=B6rnsson?= Date: Wed, 12 May 2021 08:28:52 -0700 Subject: [PATCH 3/5] Add more info about time.Duration and clearer deprecation notice --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1dfb09b..9de68b2 100644 --- a/README.md +++ b/README.md @@ -232,8 +232,8 @@ ASG Roller takes its configuration via environment variables. All environment va * `ROLLER_DRAIN_FORCE` [`bool` default: `true`]: If drain will force delete kubernetes resources if they violate PDB or grace periods. * `ROLLER_IGNORE_DAEMONSETS` [`bool`, default: `true`]: If set to `false`, will not reclaim a node until there are no DaemonSets running on the node; if set to `true` (default), will reclaim node when all regular pods are drained off, but will ignore the presence of DaemonSets, which should be present on every node anyways. Normally, you want this set to `true`. * `ROLLER_DELETE_LOCAL_DATA` [`bool`, default: `false`]: If set to `false` (default), will not reclaim a node until there are no pods with [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) running on the node; if set to `true`, will continue to terminate the pod and delete the local data before reclaiming the node. The default is `false` to maintain backward compatibility. -* `ROLLER_INTERVAL` [`time.Duration`, default: `30s`]: Time between roller runs. Takes time duration such as 10s, 10m, 10d -* `ROLLER_CHECK_DELAY` [`int`]: Time, in seconds, between checks of ASG status. deprecated, use `ROLLER_INTERVAL`. +* `ROLLER_INTERVAL` [`time.Duration`, default: `30s`]: Time between roller runs. Decimal number with a unit suffix, such as "10s", "10m", "10d", "300ms", "-1.5h" or "2h45m". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Internally uses [time.ParseDuration](https://golang.org/pkg/time/#ParseDuration) +* `ROLLER_CHECK_DELAY` [`int`]: Time, in seconds, between checks of ASG status. **Deprecated**, use `ROLLER_INTERVAL`. If both `ROLLER_CHECK_DELAY` and `ROLLER_INTERVAL` are specified then `ROLLER_INTERVAL` is used. * `ROLLER_CAN_INCREASE_MAX` `bool`: If set to `true`, will increase the ASG maximum size to accommodate the increase in desired count. If set to `false`, will instead error when desired is higher than max. * `ROLLER_ORIGINAL_DESIRED_ON_TAG` [`bool`, default: `false`]: If set to `true`, will store the original desired value of the ASG as a tag on the ASG, with the key `aws-asg-roller/OriginalDesired`. This helps maintain state in the situation where the process terminates. * `ROLLER_VERBOSE` [`bool`, default: `false`]: If set to `true`, will increase verbosity of logs. From aeec8970a8c1b8e20a02f981f8689b6f41637a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helgi=20=C3=9Eormar=20=C3=9Eorbj=C3=B6rnsson?= Date: Wed, 12 May 2021 08:34:09 -0700 Subject: [PATCH 4/5] Show for how long the interval is sleeping --- main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.go b/main.go index 1047b6c..7571038 100644 --- a/main.go +++ b/main.go @@ -38,7 +38,7 @@ func main() { log.Printf("Error adjusting AutoScaling Groups: %v", err) } // delay with each loop - log.Printf("Sleeping %d seconds\n", configs.Interval) + log.Printf("Sleeping %v\n", configs.Interval) time.Sleep(configs.Interval) } } From e6e6dd6793b1def410dda808f20d9555f2a6ddaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Helgi=20=C3=9Eormar=20=C3=9Eorbj=C3=B6rnsson?= Date: Sun, 16 May 2021 13:22:35 -0700 Subject: [PATCH 5/5] Bump env lib to 6.6.0 https://github.com/caarlos0/env/releases/tag/v6.6.0 --- go.mod | 2 +- go.sum | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 81f8c6b..a1751f0 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.12 require ( github.com/aws/aws-sdk-go v1.21.8 - github.com/caarlos0/env/v6 v6.5.0 + github.com/caarlos0/env/v6 v6.6.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-log/log v0.2.0 // indirect github.com/gogo/protobuf v0.0.0-20170330071051-c0656edd0d9e // indirect diff --git a/go.sum b/go.sum index b70422a..29895ac 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/aws/aws-sdk-go v1.21.8 h1:Lv6hW2twBhC6mGZAuWtqplEpIIqtVctJg02sE7Qn0Zw= github.com/aws/aws-sdk-go v1.21.8/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/caarlos0/env/v6 v6.5.0 h1:f4C7ZQwm0nRFo8vETCQviLUOtOlOwsOhgc/QXp0zrTM= -github.com/caarlos0/env/v6 v6.5.0/go.mod h1:5ZqhjfyF261xGkANuSuMQ1FeA9ikA3wzDY64wSd9k8k= +github.com/caarlos0/env/v6 v6.6.0 h1:kVhajCpqX5pSfH41gFd8cPXPZahqJrnn9HxJ1vKftW4= +github.com/caarlos0/env/v6 v6.6.0/go.mod h1:P0BVSgU9zfkxfSpFUs6KsO3uWR4k3Ac0P66ibAGTybM= github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= @@ -46,6 +46,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/matryer/is v1.4.0 h1:sosSmIWwkYITGrxZ25ULNDeKiMNzFSr4V/eqBQP0PeE= +github.com/matryer/is v1.4.0/go.mod h1:8I/i5uYgLzgsgEloJE1U6xx5HkBQpAZvepWuujKwMRU= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180320133207-05fbef0ca5da/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=