Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Agent to be more resilient in case of unauthenticated timeouts with IMDS #3795

Merged
merged 1 commit into from
Jul 13, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion agent/api/ecsclient/client.go
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
package ecsclient

import (
"context"
"errors"
"fmt"
"runtime"
@@ -24,12 +25,14 @@ import (
apicontainerstatus "github.com/aws/amazon-ecs-agent/agent/api/container/status"
"github.com/aws/amazon-ecs-agent/agent/async"
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/credentials/instancecreds"
"github.com/aws/amazon-ecs-agent/agent/ec2"
"github.com/aws/amazon-ecs-agent/agent/httpclient"
"github.com/aws/amazon-ecs-agent/agent/utils"
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
"github.com/aws/amazon-ecs-agent/ecs-agent/ecs_client/model/ecs"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
@@ -53,6 +56,12 @@ const (
// ecsMaxNetworkBindingsLength is the maximum length of the ecs.NetworkBindings list sent as part of the
// container state change payload. Currently, this is enforced only when containerPortRanges are requested.
ecsMaxNetworkBindingsLength = 100
// Following constants used for SetInstanceIdentity retry with exponential backoff
setInstanceIdRetryTimeOut = 30 * time.Second
setInstanceIdRetryBackoffMin = 100 * time.Millisecond
setInstanceIdRetryBackoffMax = 5 * time.Second
setInstanceIdRetryBackoffJitter = 0.2
setInstanceIdRetryBackoffMultiple = 2
)

// APIECSClient implements ECSClient
@@ -226,7 +235,24 @@ func (client *APIECSClient) setInstanceIdentity(registerRequest ecs.RegisterCont
}

iidRetrieved := true
instanceIdentityDoc, err := client.ec2metadata.GetDynamicData(ec2.InstanceIdentityDocumentResource)
backoff := retry.NewExponentialBackoff(setInstanceIdRetryBackoffMin, setInstanceIdRetryBackoffMax,
setInstanceIdRetryBackoffJitter, setInstanceIdRetryBackoffMultiple)
ctx, cancel := context.WithTimeout(context.Background(), setInstanceIdRetryTimeOut)
defer cancel()
err := retry.RetryWithBackoffCtx(ctx, backoff, func() error {
var attemptErr error
seelog.Debugf("Attempting to get Instance Identity Document")
instanceIdentityDoc, attemptErr = client.ec2metadata.GetDynamicData(ec2.InstanceIdentityDocumentResource)
if attemptErr != nil {
seelog.Debugf("Unable to get instance identity document, retrying: %v", attemptErr)
// force credentials to expire in case they are stale but not expired
client.credentialProvider.Expire()
client.credentialProvider = instancecreds.GetCredentials(client.config.External.Enabled())
return apierrors.NewRetriableError(apierrors.NewRetriable(true), attemptErr)
}
seelog.Debugf("Successfully retrieved Instance Identity Document")
return nil
})
if err != nil {
seelog.Errorf("Unable to get instance identity document: %v", err)
iidRetrieved = false
14 changes: 14 additions & 0 deletions agent/api/ecsclient/client_test.go
Original file line number Diff line number Diff line change
@@ -378,6 +378,13 @@ func TestRegisterContainerInstance(t *testing.T) {
name string
cfg *config.Config
}{
{
name: "retry GetDynamicData",
cfg: &config.Config{
Cluster: configuredCluster,
AWSRegion: "us-west-2",
},
},
{
name: "basic case",
cfg: &config.Config{
@@ -446,7 +453,14 @@ func TestRegisterContainerInstance(t *testing.T) {
if tc.cfg.NoIID {
expectedIID = ""
expectedIIDSig = ""
} else if tc.name == "retry GetDynamicData" {
gomock.InOrder(
mockEC2Metadata.EXPECT().GetDynamicData(ec2.InstanceIdentityDocumentResource).Return("", errors.New("fake unit test error")),
mockEC2Metadata.EXPECT().GetDynamicData(ec2.InstanceIdentityDocumentResource).Return(expectedIID, nil),
mockEC2Metadata.EXPECT().GetDynamicData(ec2.InstanceIdentityDocumentSignatureResource).Return(expectedIIDSig, nil),
)
} else {
//basic case
gomock.InOrder(
mockEC2Metadata.EXPECT().GetDynamicData(ec2.InstanceIdentityDocumentResource).Return(expectedIID, nil),
mockEC2Metadata.EXPECT().GetDynamicData(ec2.InstanceIdentityDocumentSignatureResource).Return(expectedIIDSig, nil),