From b2092bfe766a6f38ef85874b50f81f08028c42fe Mon Sep 17 00:00:00 2001 From: Pier-Hugues Pellerin Date: Tue, 14 Jul 2020 17:29:52 -0400 Subject: [PATCH] [Elastic Agent] Handle 429 response from the server and adjust backoff (#19918) * [Elastic Agent] Handle 429 response from the server and adjust backoff When enrolling and the server currently handle to many concurrent request it will return a 429 status code. The enroll subcommand will retry to enroll with an exponential backoff. (Init 15sec and max 10mins) This also adjust the backoff logic in the ACK. Requires: https://github.com/elastic/kibana/pull/71552 * changelog * Change values (cherry picked from commit 2db21521a4cfe1459200d84ea24723c0cdad5128) --- x-pack/elastic-agent/CHANGELOG.asciidoc | 2 ++ .../pkg/agent/application/fleet_gateway.go | 4 ++-- x-pack/elastic-agent/pkg/agent/cmd/enroll.go | 15 +++++++++++++++ x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go | 7 +++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/x-pack/elastic-agent/CHANGELOG.asciidoc b/x-pack/elastic-agent/CHANGELOG.asciidoc index 811f9cd6a859..995fb5987940 100644 --- a/x-pack/elastic-agent/CHANGELOG.asciidoc +++ b/x-pack/elastic-agent/CHANGELOG.asciidoc @@ -84,3 +84,5 @@ - Agent now load balances across multiple Kibana instances {pull}19628[19628] - Configuration cleanup {pull}19848[19848] - Agent now sends its own logs to elasticsearch {pull}19811[19811] +- Add --insecure option to enroll command {pull}19900[19900] +- Will retry to enroll if the server return a 429. {pull}19918[19811] diff --git a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go index 2856cd83abf7..cd94380e6739 100644 --- a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go +++ b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway.go @@ -23,8 +23,8 @@ var defaultGatewaySettings = &fleetGatewaySettings{ Duration: 1 * time.Second, // time between successful calls Jitter: 500 * time.Millisecond, // used as a jitter for duration Backoff: backoffSettings{ // time after a failed call - Init: 5 * time.Second, - Max: 60 * time.Second, + Init: 60 * time.Second, + Max: 10 * time.Minute, }, } diff --git a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go index bfa1e73cb6ad..e15ef7222f74 100644 --- a/x-pack/elastic-agent/pkg/agent/cmd/enroll.go +++ b/x-pack/elastic-agent/pkg/agent/cmd/enroll.go @@ -12,6 +12,7 @@ import ( "github.com/spf13/cobra" + "github.com/elastic/beats/v7/libbeat/common/backoff" c "github.com/elastic/beats/v7/libbeat/common/cli" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/application" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/configuration" @@ -20,6 +21,7 @@ import ( "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/cli" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/config" "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/logger" + "github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/fleetapi" ) var defaultDelay = 1 * time.Second @@ -112,6 +114,19 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command, flags *globalFlags, args } err = c.Execute() + signal := make(chan struct{}) + + backExp := backoff.NewExpBackoff(signal, 60*time.Second, 10*time.Minute) + + for err == fleetapi.ErrTooManyRequests { + fmt.Fprintln(streams.Out, "Too many requests on the remote server, will retry in a moment.") + backExp.Wait() + fmt.Fprintln(streams.Out, "Retrying to enroll...") + err = c.Execute() + } + + close(signal) + if err != nil { return errors.New(err, "fail to enroll") } diff --git a/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go b/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go index 0d2784ef7419..55955f3edd56 100644 --- a/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go +++ b/x-pack/elastic-agent/pkg/fleetapi/enroll_cmd.go @@ -21,6 +21,9 @@ import ( // EnrollType is the type of enrollment to do with the elastic-agent. type EnrollType string +// ErrTooManyRequests is received when the remote server is overloaded. +var ErrTooManyRequests = errors.New("too many requests received (429)") + const ( // PermanentEnroll is default enrollment type, by default an Agent is permanently enroll to Agent. PermanentEnroll = EnrollType("PERMANENT") @@ -190,6 +193,10 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo } defer resp.Body.Close() + if resp.StatusCode == http.StatusTooManyRequests { + return nil, ErrTooManyRequests + } + if resp.StatusCode != http.StatusOK { return nil, extract(resp.Body) }