diff --git a/docs/pages/server-access/guides/ec2-discovery.mdx b/docs/pages/server-access/guides/ec2-discovery.mdx index f34fc2b3ce6a0..7aeb3d678bc55 100644 --- a/docs/pages/server-access/guides/ec2-discovery.mdx +++ b/docs/pages/server-access/guides/ec2-discovery.mdx @@ -399,6 +399,19 @@ error json: cannot unmarshal object into Go struct field DownloadContentPlugin.s It is likely that you're running an older SSM agent version. Upgrade to SSM agent version 3.1 or greater to resolve. +### `InvalidInstanceId: Instances [[i-123]] not in a valid state for account 456` + +The following problems can cause this error: +- The Discovery Service doesn't have permission to access the managed node. +- AWS Systems Manager Agent (SSM Agent) isn't running. Verify that SSM Agent is running. +- SSM Agent isn't registered with the SSM endpoint. Try reinstalling SSM Agent. +- The discovered instance does not have permission to receive SSM + commands, verify the instance includes the AmazonSSMManagedInstanceCore IAM policy. + +See SSM RunCommand error codes and troubleshooting information in AWS documentation for more details: +- https://docs.aws.amazon.com/systems-manager/latest/userguide/troubleshooting-managed-instances.html +- https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors + ## Next steps - Read [Joining Nodes via AWS IAM diff --git a/lib/srv/discovery/discovery.go b/lib/srv/discovery/discovery.go index 461b957c55ce2..906d6cf41b10d 100644 --- a/lib/srv/discovery/discovery.go +++ b/lib/srv/discovery/discovery.go @@ -25,7 +25,9 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v3" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ssm" "github.com/gravitational/trace" "github.com/sirupsen/logrus" "golang.org/x/exp/slices" @@ -404,7 +406,10 @@ func (s *Server) handleEC2Discovery() { instances.AccountID, genEC2InstancesLogStr(ec2Instances.Instances)) if err := s.handleEC2Instances(ec2Instances); err != nil { - if trace.IsNotFound(err) { + var aErr awserr.Error + if errors.As(err, &aErr) && aErr.Code() == ssm.ErrCodeInvalidInstanceId { + s.Log.WithError(err).Error("SSM SendCommand failed with ErrCodeInvalidInstanceId. Make sure that the instances have AmazonSSMManagedInstanceCore policy assigned. Also check that SSM agent is running and registered with the SSM endpoint on that instance and try restarting or reinstalling it in case of issues. See https://docs.aws.amazon.com/systems-manager/latest/APIReference/API_SendCommand.html#API_SendCommand_Errors for more details.") + } else if trace.IsNotFound(err) { s.Log.Debug("All discovered EC2 instances are already part of the cluster.") } else { s.Log.WithError(err).Error("Failed to enroll discovered EC2 instances.")