diff --git a/docs/pages/server-access/guides/ec2-discovery.mdx b/docs/pages/server-access/guides/ec2-discovery.mdx index b4ec741b7b0cc..fd34f64dfa645 100644 --- a/docs/pages/server-access/guides/ec2-discovery.mdx +++ b/docs/pages/server-access/guides/ec2-discovery.mdx @@ -397,6 +397,19 @@ error json: cannot unmarshal object into Go struct field DownloadContentPlugin.s It is likely that you're running an older SSM agent version. Upgrade to SSM agent version 3.1 or greater to resolve. +### `InvalidInstanceId: Instances [[i-123]] not in a valid state for account 456` + +The following problems can cause this error: +- The Discovery Service doesn't have permission to access the managed node. +- AWS Systems Manager Agent (SSM Agent) isn't running. Verify that SSM Agent is running. +- SSM Agent isn't registered with the SSM endpoint. Try reinstalling SSM Agent. +- The discovered instance does not have permission to receive SSM + commands, verify the instance includes the AmazonSSMManagedInstanceCore IAM policy. + +See SSM RunCommand error codes and troubleshooting information in AWS documentation for more details: +- https://docs.aws.amazon.com/systems-manager/latest/userguide/troubleshooting-managed-instances.html +- https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors + ## Next steps - Read [Joining Nodes via AWS IAM diff --git a/lib/srv/discovery/discovery.go b/lib/srv/discovery/discovery.go index 0d02a056a3885..bf58b64788e54 100644 --- a/lib/srv/discovery/discovery.go +++ b/lib/srv/discovery/discovery.go @@ -25,7 +25,9 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v3" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ssm" "github.com/gravitational/trace" "github.com/sirupsen/logrus" "golang.org/x/exp/slices" @@ -416,7 +418,10 @@ func (s *Server) handleEC2Discovery() { instances.AccountID, genEC2InstancesLogStr(ec2Instances.Instances)) if err := s.handleEC2Instances(ec2Instances); err != nil { - if trace.IsNotFound(err) { + var aErr awserr.Error + if errors.As(err, &aErr) && aErr.Code() == ssm.ErrCodeInvalidInstanceId { + s.Log.WithError(err).Error("SSM SendCommand failed with ErrCodeInvalidInstanceId. Make sure that the instances have AmazonSSMManagedInstanceCore policy assigned. Also check that SSM agent is running and registered with the SSM endpoint on that instance and try restarting or reinstalling it in case of issues. See https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors for more details.") + } else if trace.IsNotFound(err) { s.Log.Debug("All discovered EC2 instances are already part of the cluster.") } else { s.Log.WithError(err).Error("Failed to enroll discovered EC2 instances.")