Skip to content

Commit

Permalink
s3_object - Rework list_objects handling
Browse files Browse the repository at this point in the history
  • Loading branch information
tremble committed Oct 9, 2024
1 parent abdf221 commit e061639
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 43 deletions.
6 changes: 6 additions & 0 deletions changelogs/fragments/1953-max_tokens.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
deprecated_features:
- s3_object - Support for ``mode=list`` has been deprecated. ``amazon.aws.s3_object_info`` should be used instead (https://github.com/ansible-collections/amazon.aws/pull/2328).
bugfixes:
- s3_object - Fixed an issue where ``max_keys`` was not respected (https://github.com/ansible-collections/amazon.aws/pull/2328).
minor_changes:
- s3_object_info - Added support for ``max_keys`` and ``marker`` parameter (https://github.com/ansible-collections/amazon.aws/pull/2328).
17 changes: 17 additions & 0 deletions plugins/module_utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,20 @@ def list_bucket_inventory_configurations(client, bucket_name):
entries.extend(response["InventoryConfigurationList"])
next_token = response.get("NextToken")
return entries


@AWSRetry.jittered_backoff()
def _list_objects_v2(client, **params):
params = {k: v for k, v in params.items() if v is not None}
# For practical purposes, the paginator ignores MaxKeys, if we've been passed MaxKeys we need to
# explicitly call list_objects_v3 rather than re-use the paginator
if params.get("MaxKeys", None) is not None:
return client.list_objects_v2(**params)

paginator = client.get_paginator("list_objects_v2")
return paginator.paginate(**params).build_full_result()


def list_bucket_object_keys(client, bucket, prefix=None, max_keys=None, start_after=None):
response = _list_objects_v2(client, Bucket=bucket, Prefix=prefix, StartAfter=start_after, MaxKeys=max_keys)
return [c["Key"] for c in response.get("Contents", [])]
41 changes: 18 additions & 23 deletions plugins/modules/s3_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@
- Specifies the key to start with when using list mode. Object keys are returned in
alphabetical order, starting with key after the marker in order.
type: str
default: ''
max_keys:
description:
- Max number of results to return when O(mode=list), set this if you want to retrieve fewer
than the default 1000 keys.
- Max number of results to return when O(mode=list)
- When not set, B(all) keys will be returned.
- Ignored when O(mode) is not V(list).
default: 1000
type: int
metadata:
description:
Expand All @@ -84,6 +82,8 @@
- 'V(copy): copy object that is already stored in another bucket'
- Support for creating and deleting buckets was removed in release 6.0.0.
To create and manage the bucket itself please use the M(amazon.aws.s3_bucket) module.
- Support for V(list) has been deprecated and will be removed in a release after 2026-11-01.
The M(amazon.aws.s3_object_info) module should be used instead of V(list).
required: true
choices: ['get', 'put', 'create', 'geturl', 'getstr', 'delobj', 'list', 'copy']
type: str
Expand Down Expand Up @@ -444,6 +444,7 @@
from ansible_collections.amazon.aws.plugins.module_utils.s3 import HAS_MD5
from ansible_collections.amazon.aws.plugins.module_utils.s3 import calculate_etag
from ansible_collections.amazon.aws.plugins.module_utils.s3 import calculate_etag_content
from ansible_collections.amazon.aws.plugins.module_utils.s3 import list_bucket_object_keys
from ansible_collections.amazon.aws.plugins.module_utils.s3 import s3_extra_params
from ansible_collections.amazon.aws.plugins.module_utils.s3 import validate_bucket_name
from ansible_collections.amazon.aws.plugins.module_utils.tagging import ansible_dict_to_boto3_tag_list
Expand Down Expand Up @@ -579,25 +580,9 @@ def bucket_check(module, s3, bucket, validate=True):
)


@AWSRetry.jittered_backoff()
def paginated_list(s3, **pagination_params):
pg = s3.get_paginator("list_objects_v2")
for page in pg.paginate(**pagination_params):
for data in page.get("Contents", []):
yield data["Key"]


def list_keys(s3, bucket, prefix=None, marker=None, max_keys=None):
pagination_params = {
"Bucket": bucket,
"Prefix": prefix,
"StartAfter": marker,
"MaxKeys": max_keys,
}
pagination_params = {k: v for k, v in pagination_params.items() if v}

try:
return list(paginated_list(s3, **pagination_params))
return list_bucket_object_keys(s3, bucket, prefix=prefix, start_after=marker, max_keys=max_keys)
except (
botocore.exceptions.ClientError,
botocore.exceptions.BotoCoreError,
Expand Down Expand Up @@ -1487,8 +1472,8 @@ def main():
encryption_mode=dict(choices=["AES256", "aws:kms"], default="AES256"),
expiry=dict(default=600, type="int", aliases=["expiration"]),
headers=dict(type="dict"),
marker=dict(default=""),
max_keys=dict(default=1000, type="int", no_log=False),
marker=dict(),
max_keys=dict(type="int", no_log=False),
metadata=dict(type="dict"),
mode=dict(choices=valid_modes, required=True),
sig_v4=dict(default=True, type="bool"),
Expand Down Expand Up @@ -1538,6 +1523,16 @@ def main():
mutually_exclusive=[["content", "content_base64", "src"]],
)

if module.params.get("mode") == "list":
module.deprecate(
(
"Support for 'list' mode has been deprecated and will be removed in a release after "
"2024-11-01. Please use the amazon.aws.s3_object_info module instead."
),
date="2026-11-01",
collection_name="amazon.aws",
)

endpoint_url = module.params.get("endpoint_url")
dualstack = module.params.get("dualstack")

Expand Down
41 changes: 21 additions & 20 deletions plugins/modules/s3_object_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@
type: list
elements: str
choices: ['ETag', 'Checksum', 'ObjectParts', 'StorageClass', 'ObjectSize']
marker:
description:
- Specifies the Object key to start with. Object keys are returned in alphabetical order, starting with key
after the marker in order.
type: str
version_added: 9.0.0
max_keys:
description:
- Max number of results to return. Set this if you want to retrieve only partial results.
type: int
version_added: 9.0.0
notes:
- Support for the E(S3_URL) environment variable has been
deprecated and will be removed in a release after 2024-12-01, please use the O(endpoint_url) parameter
Expand Down Expand Up @@ -441,6 +452,7 @@
from ansible_collections.amazon.aws.plugins.module_utils.botocore import is_boto3_error_code
from ansible_collections.amazon.aws.plugins.module_utils.modules import AnsibleAWSModule
from ansible_collections.amazon.aws.plugins.module_utils.retries import AWSRetry
from ansible_collections.amazon.aws.plugins.module_utils.s3 import list_bucket_object_keys
from ansible_collections.amazon.aws.plugins.module_utils.s3 import s3_extra_params
from ansible_collections.amazon.aws.plugins.module_utils.tagging import boto3_tag_list_to_ansible_dict

Expand Down Expand Up @@ -622,30 +634,17 @@ def get_object(connection, bucket_name, object_name):
return result


@AWSRetry.jittered_backoff(retries=10)
def _list_bucket_objects(connection, **params):
paginator = connection.get_paginator("list_objects")
return paginator.paginate(**params).build_full_result()


def list_bucket_objects(connection, module, bucket_name):
params = {}
params["Bucket"] = bucket_name

result = []
list_objects_response = {}

try:
list_objects_response = _list_bucket_objects(connection, **params)
keys = list_bucket_object_keys(
connection,
bucket=bucket_name,
max_keys=module.params["max_keys"],
start_after=module.params["marker"],
)
except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
module.fail_json_aws(e, msg="Failed to list bucket objects.")

if len(list_objects_response) != 0:
# convert to snake_case
for response_list_item in list_objects_response.get("Contents", []):
result.append(response_list_item["Key"])

return result
return keys


def bucket_check(
Expand Down Expand Up @@ -691,6 +690,8 @@ def main():
object_name=dict(type="str"),
dualstack=dict(default=False, type="bool"),
ceph=dict(default=False, type="bool", aliases=["rgw"]),
marker=dict(),
max_keys=dict(type=int, no_log=False),
)

required_if = [
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/targets/s3_object/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,20 @@
- "'delete.txt' in result.s3_keys"
- result.msg == "LIST operation complete"

- name: test list to get just 1 object from the bucket
amazon.aws.s3_object:
bucket: "{{ bucket_name }}"
mode: list
max_keys: 1
retries: 3
delay: 3
register: result

- ansible.builtin.assert:
that:
- "(result.s3_keys | length) == 1"
- result.msg == "LIST operation complete"

- name: test delobj to just delete an object in the bucket
amazon.aws.s3_object:
bucket: "{{ bucket_name }}"
Expand Down

0 comments on commit e061639

Please sign in to comment.