diff --git a/docs/pages/server-access/guides/ec2-discovery.mdx b/docs/pages/server-access/guides/ec2-discovery.mdx index 3783beebd4e26..9713c97cd0539 100644 --- a/docs/pages/server-access/guides/ec2-discovery.mdx +++ b/docs/pages/server-access/guides/ec2-discovery.mdx @@ -393,6 +393,19 @@ error json: cannot unmarshal object into Go struct field DownloadContentPlugin.s It is likely that you're running an older SSM agent version. Upgrade to SSM agent version 3.1 or greater to resolve. +### `InvalidInstanceId: Instances [[i-123]] not in a valid state for account 456` + +The following problems can cause this error: +- The Discovery Service doesn't have permission to access the managed node. +- AWS Systems Manager Agent (SSM Agent) isn't running. Verify that SSM Agent is running. +- SSM Agent isn't registered with the SSM endpoint. Try reinstalling SSM Agent. +- The discovered instance does not have permission to receive SSM + commands, verify the instance includes the AmazonSSMManagedInstanceCore IAM policy. + +See SSM RunCommand error codes and troubleshooting information in AWS documentation for more details: +- https://docs.aws.amazon.com/systems-manager/latest/userguide/troubleshooting-managed-instances.html +- https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors + ## Next steps - Read [Joining Nodes via AWS IAM Role](../../management/guides/joining-nodes-aws-iam.mdx) diff --git a/lib/srv/discovery/discovery.go b/lib/srv/discovery/discovery.go index 0c8ce38a11b5d..0d8d0b3d47bd7 100644 --- a/lib/srv/discovery/discovery.go +++ b/lib/srv/discovery/discovery.go @@ -18,12 +18,15 @@ package discovery import ( "context" + "errors" "fmt" "strings" "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ssm" "github.com/gravitational/trace" "github.com/sirupsen/logrus" @@ -331,7 +334,10 @@ func (s *Server) handleEC2Discovery() { s.Log.Debugf("EC2 instances discovered (AccountID: %s, Instances: %v), starting installation", instances.AccountID, genInstancesLogStr(instances.Instances)) if err := s.handleInstances(&instances); err != nil { - if trace.IsNotFound(err) { + var aErr awserr.Error + if errors.As(err, &aErr) && aErr.Code() == ssm.ErrCodeInvalidInstanceId { + s.Log.WithError(err).Error("SSM SendCommand failed with ErrCodeInvalidInstanceId. Make sure that the instances have AmazonSSMManagedInstanceCore policy assigned. Also check that SSM agent is running and registered with the SSM endpoint on that instance and try restarting or reinstalling it in case of issues. See https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors for more details.") + } else if trace.IsNotFound(err) { s.Log.Debug("All discovered EC2 instances are already part of the cluster.") } else { s.Log.WithError(err).Error("Failed to enroll discovered EC2 instances.")