diff --git a/aws/table_aws_tagging_resource.go b/aws/table_aws_tagging_resource.go index fbd5104f1..2cfa69cd1 100644 --- a/aws/table_aws_tagging_resource.go +++ b/aws/table_aws_tagging_resource.go @@ -2,6 +2,8 @@ package aws import ( "context" + "encoding/json" + "errors" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi" @@ -10,6 +12,7 @@ import ( "github.com/turbot/steampipe-plugin-sdk/v5/grpc/proto" "github.com/turbot/steampipe-plugin-sdk/v5/plugin" "github.com/turbot/steampipe-plugin-sdk/v5/plugin/transform" + "github.com/turbot/steampipe-plugin-sdk/v5/query_cache" ) func tableAwsTaggingResource(_ context.Context) *plugin.Table { @@ -27,6 +30,14 @@ func tableAwsTaggingResource(_ context.Context) *plugin.Table { List: &plugin.ListConfig{ Hydrate: listTaggingResources, Tags: map[string]string{"service": "tag", "action": "GetResources"}, + KeyColumns: []*plugin.KeyColumn{ + { + Name: "resource_types", + Require: plugin.Optional, + Operators: []string{"="}, + CacheMatch: query_cache.CacheMatchExact, + }, + }, }, GetMatrixItemFunc: SupportedRegionMatrix(AWS_TAGGING_SERVICE_ID), Columns: awsRegionalColumns([]*plugin.Column{ @@ -66,6 +77,12 @@ func tableAwsTaggingResource(_ context.Context) *plugin.Table { Type: proto.ColumnType_JSON, Transform: transform.FromField("Tags"), }, + { + Name: "resource_types", + Description: "The resource types to filter by. Accepts a JSON array of strings in formats: 'service' (e.g., \"ec2\") or 'service:resourceType' (e.g., \"ec2:instance\").", + Type: proto.ColumnType_JSON, + Transform: transform.FromQual("resource_types"), + }, /// Steampipe standard columns { @@ -100,48 +117,149 @@ func listTaggingResources(ctx context.Context, d *plugin.QueryData, _ *plugin.Hy return nil, err } - input := &resourcegroupstaggingapi.GetResourcesInput{ - ResourcesPerPage: aws.Int32(100), + // Parse resource type filters from query qualifiers + resourceTypes, err := parseResourceTypesFilter(d) + if err != nil { + return nil, err } - // Reduce the basic request limit down if the user has only requested a small number of rows - if d.QueryContext.Limit != nil { - limit := int32(*d.QueryContext.Limit) - if limit < *input.ResourcesPerPage { - if limit < 1 { - input.ResourcesPerPage = aws.Int32(1) - } else { - input.ResourcesPerPage = aws.Int32(limit) - } + // Split resource types into batches to respect API limits (max 100 per request) + batches := createResourceTypeBatches(resourceTypes) + + // Track seen resources to avoid duplicates across batches + seenResources := make(map[string]bool) + + // Process each batch of resource types + for _, batch := range batches { + if err := fetchResourcesForBatch(ctx, d, svc, batch, seenResources); err != nil { + return nil, err } + // Check if context has been cancelled or if the limit has been hit + if d.RowsRemaining(ctx) == 0 { + break + } + } + + return nil, nil +} + +// parseResourceTypesFilter extracts and validates resource types from query data +func parseResourceTypesFilter(d *plugin.QueryData) ([]string, error) { + resourceTypesValue := d.EqualsQuals["resource_types"].GetJsonbValue() + if resourceTypesValue == "" { + return nil, nil } + var resourceTypes []string + if err := json.Unmarshal([]byte(resourceTypesValue), &resourceTypes); err != nil { + return nil, errors.New("failed to parse 'resource_types' qualifier: value must be a JSON array of strings, e.g. [\"ec2:instance\", \"s3:bucket\", \"rds\"]") + } + + return resourceTypes, nil +} + +// createResourceTypeBatches splits resource types into batches for API requests +func createResourceTypeBatches(resourceTypes []string) [][]string { + const maxBatchSize = 100 // AWS Resource Groups Tagging API limit + + // If no resource types specified, make a single request without filters + if len(resourceTypes) == 0 { + return [][]string{{}} // Single empty batch for unfiltered requests + } + + // Split resource types into batches that don't exceed API limit + var batches [][]string + for i := 0; i < len(resourceTypes); i += maxBatchSize { + end := i + maxBatchSize + if end > len(resourceTypes) { + end = len(resourceTypes) + } + batches = append(batches, resourceTypes[i:end]) + } + + return batches +} + +// fetchResourcesForBatch fetches resources for a specific batch of resource types +func fetchResourcesForBatch(ctx context.Context, d *plugin.QueryData, svc *resourcegroupstaggingapi.Client, resourceTypes []string, seenResources map[string]bool) error { + // Build API input with pagination settings and resource type filters + input := buildGetResourcesInput(d, resourceTypes) + + // Create paginator to handle large result sets paginator := resourcegroupstaggingapi.NewGetResourcesPaginator(svc, input, func(o *resourcegroupstaggingapi.GetResourcesPaginatorOptions) { o.Limit = *input.ResourcesPerPage o.StopOnDuplicateToken = true }) + // List call - iterate through all pages for paginator.HasMorePages() { - // apply rate limiting + // Apply rate limiting d.WaitForListRateLimit(ctx) output, err := paginator.NextPage(ctx) if err != nil { plugin.Logger(ctx).Error("aws_tagging_resource.listTaggingResources", "api_error", err) - return nil, err + return err } - for _, resource := range output.ResourceTagMappingList { - d.StreamListItem(ctx, resource) + // Process the resources from this page + if err := processResourceBatch(ctx, d, output.ResourceTagMappingList, seenResources); err != nil { + return err + } - // Context can be cancelled due to manual cancellation or the limit has been hit - if d.RowsRemaining(ctx) == 0 { - return nil, nil + // Check if context has been cancelled or if the limit has been hit + if d.RowsRemaining(ctx) == 0 { + return nil + } + } + + return nil +} + +// buildGetResourcesInput creates the API input with appropriate pagination settings +func buildGetResourcesInput(d *plugin.QueryData, resourceTypes []string) *resourcegroupstaggingapi.GetResourcesInput { + input := &resourcegroupstaggingapi.GetResourcesInput{ + ResourcesPerPage: aws.Int32(100), + } + + // Add resource type filters if specified + if len(resourceTypes) > 0 { + input.ResourceTypeFilters = resourceTypes + } + + // Reduce the basic request limit down if the user has only requested a small number of rows + if d.QueryContext.Limit != nil { + limit := int32(*d.QueryContext.Limit) + if limit < *input.ResourcesPerPage { + if limit < 1 { + input.ResourcesPerPage = aws.Int32(1) + } else { + input.ResourcesPerPage = aws.Int32(limit) } } } - return nil, err + return input +} + +// processResourceBatch handles deduplication and streaming of resources +func processResourceBatch(ctx context.Context, d *plugin.QueryData, resources []types.ResourceTagMapping, seenResources map[string]bool) error { + for _, resource := range resources { + // Deduplicate based on ARN + arn := aws.ToString(resource.ResourceARN) + if seenResources[arn] { + continue // Skip duplicate + } + seenResources[arn] = true + + d.StreamListItem(ctx, resource) + + // Context can be cancelled due to manual cancellation or the limit has been hit + if d.RowsRemaining(ctx) == 0 { + return nil + } + } + return nil } //// HYDRATE FUNCTIONS @@ -156,6 +274,7 @@ func getTaggingResource(ctx context.Context, d *plugin.QueryData, _ *plugin.Hydr return nil, err } + // Build request for specific resource ARN param := &resourcegroupstaggingapi.GetResourcesInput{ ResourceARNList: []string{arn}, } @@ -166,6 +285,7 @@ func getTaggingResource(ctx context.Context, d *plugin.QueryData, _ *plugin.Hydr return nil, err } + // Return the first resource if found if op != nil && len(op.ResourceTagMappingList) > 0 { return op.ResourceTagMappingList[0], nil } diff --git a/docs/tables/aws_tagging_resource.md b/docs/tables/aws_tagging_resource.md index 5380d5d5f..dd5197d2f 100644 --- a/docs/tables/aws_tagging_resource.md +++ b/docs/tables/aws_tagging_resource.md @@ -64,4 +64,204 @@ from aws_tagging_resource where compliance_status is not null; -``` \ No newline at end of file +``` + +### Filter Resources by Resource Types + +Filter results to retrieve only resources from specific AWS services or resource types. The `resource_types` column accepts a JSON array of strings in two formats: + +- `service` — All resources from a service (e.g., `"ec2"`) +- `service:resourceType` — Specific resource type (e.g., `"ec2:instance"`) + +#### Examples + +**Get tags for EC2 instances only:** +```sql+postgres +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["ec2:instance"]'; +``` + +```sql+sqlite +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["ec2:instance"]'; +``` + +**Get tags for multiple resource types:** +```sql+postgres +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["ec2:instance", "s3:bucket", "rds:db"]'; +``` + +```sql+sqlite +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["ec2:instance", "s3:bucket", "rds:db"]'; +``` + +**Get tags for all resources in specific services:** +```sql+postgres +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["lambda", "dynamodb"]'; +``` + +```sql+sqlite +select + name, + arn, + tags, + region +from + aws_tagging_resource +where + resource_types = '["lambda", "dynamodb"]'; +``` + +#### Common Resource Types + +| Category | Resource Types | +|----------|----------------| +| Compute | `["ec2:instance", "lambda:function", "ecs:cluster", "eks:cluster"]` | +| Storage | `["s3:bucket", "ec2:volume", "elasticfilesystem:file-system"]` | +| Database | `["rds:db", "rds:cluster", "dynamodb:table"]` | +| Network | `["ec2:vpc", "ec2:subnet", "ec2:security-group", "elasticloadbalancing:loadbalancer"]` | +| Security | `["iam:role", "iam:policy", "kms:key"]` | +| Monitoring | `["logs:log-group", "cloudwatch:alarm", "cloudwatch:dashboard"]` | + +### API Behavior and Resource Type Discovery + +#### Automatic Batching + +The AWS Resource Groups Tagging API limits each request to 100 resource type filters. Steampipe handles this limitation transparently: + +1. **Automatic splitting**: Resource type lists exceeding 100 items are automatically split into batches +2. **Sequential execution**: Each batch is processed as a separate API request +3. **Result aggregation**: All results are combined and deduplicated by ARN +4. **Seamless streaming**: Results are returned as a single, unified dataset + +This means you can query hundreds of resource types without manual batching: + +```sql+postgres +-- This works seamlessly even though it exceeds the 100-item API limit +select name, arn, tags +from aws_tagging_resource +where resource_types = '[ + "ec2:instance", "ec2:volume", "ec2:snapshot", "ec2:image", "ec2:security-group", + "s3:bucket", "lambda:function", "rds:db", "rds:cluster", "dynamodb:table", + -- ... add as many as needed +]'; +``` + +```sql+sqlite +-- This works seamlessly even though it exceeds the 100-item API limit +select name, arn, tags +from aws_tagging_resource +where resource_types = '[ + "ec2:instance", "ec2:volume", "ec2:snapshot", "ec2:image", "ec2:security-group", + "s3:bucket", "lambda:function", "rds:db", "rds:cluster", "dynamodb:table", + -- ... add as many as needed +]'; +``` + +#### Discovering Available Resource Types + +To find which resource types you can query, use these approaches: + +**1. Query AWS Resource Explorer for supported types:** +```sql+postgres +-- List all available resource types +select + service, + resource_type, + service || ':' || resource_type as full_resource_type +from + aws_resource_explorer_supported_resource_type +order by + service, resource_type; +``` + +```sql+sqlite +-- List all available resource types +select + service, + resource_type, + service || ':' || resource_type as full_resource_type +from + aws_resource_explorer_supported_resource_type +order by + service, resource_type; +``` + +**2. Find resource types for a specific service:** +```sql+postgres +-- Example: Find all EC2 resource types +select + service, + resource_type, + service || ':' || resource_type as full_resource_type +from + aws_resource_explorer_supported_resource_type +where + service = 'ec2' +order by + resource_type; +``` + +```sql+sqlite +-- Example: Find all EC2 resource types +select + service, + resource_type, + service || ':' || resource_type as full_resource_type +from + aws_resource_explorer_supported_resource_type +where + service = 'ec2' +order by + resource_type; +``` + +#### Important Notes + +- **JSON array format**: Resource types must always be specified as a JSON array, even for single values: `'["ec2:instance"]'` +- **Service vs. resource type**: Use `"ec2"` to query all EC2 resources, or `"ec2:instance"` for specific types +- **Case sensitivity**: Resource type filters are case-sensitive and must match AWS conventions +- **Performance**: While batching is automatic, querying many resource types may take longer due to multiple API calls +- **Regional data**: Results are returned for the region specified in your connection configuration + +For the complete list of supported services and resource types, refer to: +- [AWS Resource Groups Tagging API supported services](https://docs.aws.amazon.com/resourcegroupstagging/latest/APIReference/supported-services.html) +- The [`aws_resource_explorer_supported_resource_type`](https://hub.steampipe.io/plugins/turbot/aws/tables/aws_resource_explorer_supported_resource_type) table in your Steampipe instance