From d9eb4e611b859969c3e13e6dd9f43b8439424e25 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Mon, 12 Jun 2017 09:31:13 -0700 Subject: [PATCH 1/3] Implement seek whence param for zero length files Bug: https://github.com/aws/aws-cli/issues/2403 Bug-Ubuntu: https://bugs.launchpad.net/bugs/1696800 As described in the bug reports referenced above, trying to copy a zero-length file to S3 fails (with some version combinations of python3, awscli, s3transfer, and requests) like this: $ aws s3 cp emptyfile s3://somebucket upload failed: ./emptyfile to s3://somebucket/emptyfile seek() takes 2 positional arguments but 3 were given ... due to the incomplete implementation of seek() -- missing the optional 'whence' argument -- in s3transfer's ReadFileChunk method. This patch adds the whence argument to all of s3transfer's seek implementations. --- s3transfer/__init__.py | 22 ++++++++++++++++++---- s3transfer/upload.py | 4 ++-- s3transfer/utils.py | 26 ++++++++++++++++++++------ 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/s3transfer/__init__.py b/s3transfer/__init__.py index 56f64419..ac49d8bc 100644 --- a/s3transfer/__init__.py +++ b/s3transfer/__init__.py @@ -276,12 +276,26 @@ def enable_callback(self): def disable_callback(self): self._callback_enabled = False - def seek(self, where): - self._fileobj.seek(self._start_byte + where) + def seek(self, where, whence=0): + if whence not in (0, 1, 2): + # Mimic io's error for invalid whence values + raise ValueError( + "invalid whence (%s, should be 0, 1 or 2)" % whence) + + # Recalculate where based on chunk attributes so seek from file + # start (whence=0) is always used + where += self._start_byte + if whence == 1: + where += self._amount_read + elif whence == 2: + where += self._size + + self._fileobj.seek(where) if self._callback is not None and self._callback_enabled: # To also rewind the callback() for an accurate progress report - self._callback(where - self._amount_read) - self._amount_read = where + amount = where - self._start_byte - self._amount_read + self._callback(amount) + self._amount_read = where - self._start_byte def close(self): self._fileobj.close() diff --git a/s3transfer/upload.py b/s3transfer/upload.py index a6b22a6c..e333a7e9 100644 --- a/s3transfer/upload.py +++ b/s3transfer/upload.py @@ -85,8 +85,8 @@ def read(self, amount=None): raise self._transfer_coordinator.exception return self._fileobj.read(amount) - def seek(self, where): - self._fileobj.seek(where) + def seek(self, where, whence=0): + self._fileobj.seek(where, whence) def tell(self): return self._fileobj.tell() diff --git a/s3transfer/utils.py b/s3transfer/utils.py index 82683d25..ff8374eb 100644 --- a/s3transfer/utils.py +++ b/s3transfer/utils.py @@ -363,9 +363,9 @@ def write(self, data): self._open_if_needed() self._fileobj.write(data) - def seek(self, where): + def seek(self, where, whence=0): self._open_if_needed() - self._fileobj.seek(where) + self._fileobj.seek(where, whence) def tell(self): if self._fileobj is None: @@ -499,13 +499,27 @@ def enable_callback(self): def disable_callback(self): self._callbacks_enabled = False - def seek(self, where): - self._fileobj.seek(self._start_byte + where) + def seek(self, where, whence=0): + if whence not in (0, 1, 2): + # Mimic io's error for invalid whence values + raise ValueError( + "invalid whence (%s, should be 0, 1 or 2)" % whence) + + # Recalculate where based on chunk attributes so seek from file + # start (whence=0) is always used + where += self._start_byte + if whence == 1: + where += self._amount_read + elif whence == 2: + where += self._size + + self._fileobj.seek(where) if self._callbacks is not None and self._callbacks_enabled: # To also rewind the callback() for an accurate progress report + amount = where - self._start_byte - self._amount_read invoke_progress_callbacks( - self._callbacks, bytes_transferred=where - self._amount_read) - self._amount_read = where + self._callbacks, bytes_transferred=amount) + self._amount_read = where - self._start_byte def close(self): if self._close_callbacks is not None and self._callbacks_enabled: From eecc4009f8191d384b40ef859b33960d7d80b731 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Wed, 24 Jan 2018 11:15:30 -0600 Subject: [PATCH 2/3] Test ReadFileChunk seek with whence != 0 Check that seek from current position (whence = 1) or seek from end (whence = 2) work properly with both ReadFileChunk implementations. --- tests/unit/test_s3transfer.py | 6 ++++++ tests/unit/test_utils.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tests/unit/test_s3transfer.py b/tests/unit/test_s3transfer.py index 4715be20..3600cd86 100644 --- a/tests/unit/test_s3transfer.py +++ b/tests/unit/test_s3transfer.py @@ -183,6 +183,12 @@ def test_tell_and_seek(self): self.assertEqual(chunk.tell(), 3) chunk.seek(0) self.assertEqual(chunk.tell(), 0) + chunk.seek(1, whence=1) + self.assertEqual(chunk.tell(), 1) + chunk.seek(-1, whence=1) + self.assertEqual(chunk.tell(), 0) + chunk.seek(-1, whence=2) + self.assertEqual(chunk.tell(), 2) def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 733fed04..27cb9e56 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -493,6 +493,12 @@ def test_tell_and_seek(self): self.assertEqual(chunk.tell(), 3) chunk.seek(0) self.assertEqual(chunk.tell(), 0) + chunk.seek(1, whence=1) + self.assertEqual(chunk.tell(), 1) + chunk.seek(-1, whence=1) + self.assertEqual(chunk.tell(), 0) + chunk.seek(-1, whence=2) + self.assertEqual(chunk.tell(), 2) def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') From fe4f945b1eedf425e02ec368dd064bf26e4e6e5d Mon Sep 17 00:00:00 2001 From: Nate Prewitt Date: Mon, 5 Apr 2021 16:40:43 -0700 Subject: [PATCH 3/3] Fix seek bounding issues --- .../bugfix-ReadFileChunk-16290.json | 5 + s3transfer/__init__.py | 22 +- s3transfer/bandwidth.py | 4 +- s3transfer/utils.py | 15 +- tests/functional/test_manager.py | 4 +- tests/unit/test_bandwidth.py | 2 +- tests/unit/test_download.py | 2 +- tests/unit/test_s3transfer.py | 6 - tests/unit/test_utils.py | 193 ++++++++++++++++++ 9 files changed, 218 insertions(+), 35 deletions(-) create mode 100644 .changes/next-release/bugfix-ReadFileChunk-16290.json diff --git a/.changes/next-release/bugfix-ReadFileChunk-16290.json b/.changes/next-release/bugfix-ReadFileChunk-16290.json new file mode 100644 index 00000000..48ef348a --- /dev/null +++ b/.changes/next-release/bugfix-ReadFileChunk-16290.json @@ -0,0 +1,5 @@ +{ + "type": "bugfix", + "category": "ReadFileChunk", + "description": "Fix seek behavior in ReadFileChunk class" +} diff --git a/s3transfer/__init__.py b/s3transfer/__init__.py index ac49d8bc..56f64419 100644 --- a/s3transfer/__init__.py +++ b/s3transfer/__init__.py @@ -276,26 +276,12 @@ def enable_callback(self): def disable_callback(self): self._callback_enabled = False - def seek(self, where, whence=0): - if whence not in (0, 1, 2): - # Mimic io's error for invalid whence values - raise ValueError( - "invalid whence (%s, should be 0, 1 or 2)" % whence) - - # Recalculate where based on chunk attributes so seek from file - # start (whence=0) is always used - where += self._start_byte - if whence == 1: - where += self._amount_read - elif whence == 2: - where += self._size - - self._fileobj.seek(where) + def seek(self, where): + self._fileobj.seek(self._start_byte + where) if self._callback is not None and self._callback_enabled: # To also rewind the callback() for an accurate progress report - amount = where - self._start_byte - self._amount_read - self._callback(amount) - self._amount_read = where - self._start_byte + self._callback(where - self._amount_read) + self._amount_read = where def close(self): self._fileobj.close() diff --git a/s3transfer/bandwidth.py b/s3transfer/bandwidth.py index 9c0b0bcc..07f50963 100644 --- a/s3transfer/bandwidth.py +++ b/s3transfer/bandwidth.py @@ -180,8 +180,8 @@ def signal_not_transferring(self): """Signal that data being read is not being transferred to S3""" self.disable_bandwidth_limiting() - def seek(self, where): - self._fileobj.seek(where) + def seek(self, where, whence=0): + self._fileobj.seek(where, whence) def tell(self): return self._fileobj.tell() diff --git a/s3transfer/utils.py b/s3transfer/utils.py index ff8374eb..67f7e028 100644 --- a/s3transfer/utils.py +++ b/s3transfer/utils.py @@ -425,6 +425,8 @@ def __init__(self, fileobj, chunk_size, full_file_size, self._size = self._calculate_file_size( self._fileobj, requested_size=chunk_size, start_byte=self._start_byte, actual_file_size=full_file_size) + # _amount_read represents the position in the chunk and may exceed + # the chunk size, but won't allow reads out of bounds. self._amount_read = 0 self._callbacks = callbacks if callbacks is None: @@ -473,10 +475,11 @@ def _calculate_file_size(self, fileobj, requested_size, start_byte, return min(max_chunk_size, requested_size) def read(self, amount=None): + amount_left = max(self._size - self._amount_read, 0) if amount is None: - amount_to_read = self._size - self._amount_read + amount_to_read = amount_left else: - amount_to_read = min(self._size - self._amount_read, amount) + amount_to_read = min(amount_left, amount) data = self._fileobj.read(amount_to_read) self._amount_read += len(data) if self._callbacks is not None and self._callbacks_enabled: @@ -513,13 +516,15 @@ def seek(self, where, whence=0): elif whence == 2: where += self._size - self._fileobj.seek(where) + self._fileobj.seek(max(where, self._start_byte)) if self._callbacks is not None and self._callbacks_enabled: # To also rewind the callback() for an accurate progress report - amount = where - self._start_byte - self._amount_read + bounded_where = max(min(where - self._start_byte, self._size), 0) + bounded_amount_read = min(self._amount_read, self._size) + amount = bounded_where - bounded_amount_read invoke_progress_callbacks( self._callbacks, bytes_transferred=amount) - self._amount_read = where - self._start_byte + self._amount_read = max(where - self._start_byte, 0) def close(self): if self._close_callbacks is not None and self._callbacks_enabled: diff --git a/tests/functional/test_manager.py b/tests/functional/test_manager.py index ef579eba..61a69903 100644 --- a/tests/functional/test_manager.py +++ b/tests/functional/test_manager.py @@ -10,7 +10,7 @@ # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -from io import RawIOBase +from io import BytesIO from botocore.awsrequest import create_request_object import mock @@ -27,7 +27,7 @@ class ArbitraryException(Exception): pass -class SignalTransferringBody(RawIOBase): +class SignalTransferringBody(BytesIO): """A mocked body with the ability to signal when transfers occur""" def __init__(self): super(SignalTransferringBody, self).__init__() diff --git a/tests/unit/test_bandwidth.py b/tests/unit/test_bandwidth.py index 4f35a7a8..099ca856 100644 --- a/tests/unit/test_bandwidth.py +++ b/tests/unit/test_bandwidth.py @@ -186,7 +186,7 @@ def test_seek(self): stream.seek(1) self.assertEqual( mock_fileobj.seek.call_args_list, - [mock.call(1)] + [mock.call(1, 0)] ) def test_tell(self): diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 19f274c0..2e39c8df 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -58,7 +58,7 @@ def __init__(self): self._pos = 0 self.writes = [] - def seek(self, pos): + def seek(self, pos, whence=0): self._pos = pos def write(self, data): diff --git a/tests/unit/test_s3transfer.py b/tests/unit/test_s3transfer.py index 3600cd86..4715be20 100644 --- a/tests/unit/test_s3transfer.py +++ b/tests/unit/test_s3transfer.py @@ -183,12 +183,6 @@ def test_tell_and_seek(self): self.assertEqual(chunk.tell(), 3) chunk.seek(0) self.assertEqual(chunk.tell(), 0) - chunk.seek(1, whence=1) - self.assertEqual(chunk.tell(), 1) - chunk.seek(-1, whence=1) - self.assertEqual(chunk.tell(), 0) - chunk.seek(-1, whence=2) - self.assertEqual(chunk.tell(), 2) def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 27cb9e56..28fc1bd0 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -500,6 +500,114 @@ def test_tell_and_seek(self): chunk.seek(-1, whence=2) self.assertEqual(chunk.tell(), 2) + def test_tell_and_seek_boundaries(self): + # Test to ensure ReadFileChunk behaves the same as the + # Python standard library around seeking and reading out + # of bounds in a file object. + data = b'abcdefghij12345678klmnopqrst' + start_pos = 10 + chunk_size = 8 + + # Create test file + filename = os.path.join(self.tempdir, 'foo') + with open(filename, 'wb') as f: + f.write(data) + + # ReadFileChunk should be a substring of only numbers + file_objects = [ + ReadFileChunk.from_filename( + filename, start_byte=start_pos, chunk_size=chunk_size + )] + + # Uncomment next line to validate we match Python's io.BytesIO + # file_objects.append(io.BytesIO(data[start_pos:start_pos+chunk_size])) + + for obj in file_objects: + self._assert_whence_start_behavior(obj) + self._assert_whence_end_behavior(obj) + self._assert_whence_relative_behavior(obj) + self._assert_boundary_behavior(obj) + + def _assert_whence_start_behavior(self, file_obj): + self.assertEqual(file_obj.tell(), 0) + + file_obj.seek(1, 0) + self.assertEqual(file_obj.tell(), 1) + + file_obj.seek(1) + self.assertEqual(file_obj.tell(), 1) + self.assertEqual(file_obj.read(), b'2345678') + + file_obj.seek(3, 0) + self.assertEqual(file_obj.tell(), 3) + + file_obj.seek(0, 0) + self.assertEqual(file_obj.tell(), 0) + + def _assert_whence_relative_behavior(self, file_obj): + self.assertEqual(file_obj.tell(), 0) + + file_obj.seek(2, 1) + self.assertEqual(file_obj.tell(), 2) + + file_obj.seek(1, 1) + self.assertEqual(file_obj.tell(), 3) + self.assertEqual(file_obj.read(), b'45678') + + file_obj.seek(20, 1) + self.assertEqual(file_obj.tell(), 28) + + file_obj.seek(-30, 1) + self.assertEqual(file_obj.tell(), 0) + self.assertEqual(file_obj.read(), b'12345678') + + file_obj.seek(-8, 1) + self.assertEqual(file_obj.tell(), 0) + + def _assert_whence_end_behavior(self, file_obj): + self.assertEqual(file_obj.tell(), 0) + + file_obj.seek(-1, 2) + self.assertEqual(file_obj.tell(), 7) + + file_obj.seek(1, 2) + self.assertEqual(file_obj.tell(), 9) + + file_obj.seek(3, 2) + self.assertEqual(file_obj.tell(), 11) + self.assertEqual(file_obj.read(), b'') + + file_obj.seek(-15, 2) + self.assertEqual(file_obj.tell(), 0) + self.assertEqual(file_obj.read(), b'12345678') + + file_obj.seek(-8, 2) + self.assertEqual(file_obj.tell(), 0) + + def _assert_boundary_behavior(self, file_obj): + # Verify we're at the start + self.assertEqual(file_obj.tell(), 0) + + # Verify we can't move backwards beyond start of file + file_obj.seek(-10, 1) + self.assertEqual(file_obj.tell(), 0) + + # Verify we *can* move after end of file, but return nothing + file_obj.seek(10, 2) + self.assertEqual(file_obj.tell(), 18) + self.assertEqual(file_obj.read(), b'') + self.assertEqual(file_obj.read(10), b'') + + # Verify we can partially rewind + file_obj.seek(-12, 1) + self.assertEqual(file_obj.tell(), 6) + self.assertEqual(file_obj.read(), b'78') + self.assertEqual(file_obj.tell(), 8) + + # Verify we can rewind to start + file_obj.seek(0) + self.assertEqual(file_obj.tell(), 0) + def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: @@ -560,6 +668,91 @@ def test_callback_will_also_be_triggered_by_seek(self): chunk.read(2) self.assertEqual(self.amounts_seen, [2, -2, 2, -1, 2]) + def test_callback_triggered_by_out_of_bound_seeks(self): + data = b'abcdefghij1234567890klmnopqr' + + # Create test file + filename = os.path.join(self.tempdir, 'foo') + with open(filename, 'wb') as f: + f.write(data) + chunk = ReadFileChunk.from_filename( + filename, start_byte=10, chunk_size=10, + callbacks=[self.callback]) + + # Seek calls that generate "0" progress are skipped by + # invoke_progress_callbacks and won't appear in the list. + expected_callback_prog = [10, -5, 5, -1, 1, -1, 1, -5, 5, -10] + + self._assert_out_of_bound_start_seek(chunk, expected_callback_prog) + self._assert_out_of_bound_relative_seek(chunk, expected_callback_prog) + self._assert_out_of_bound_end_seek(chunk, expected_callback_prog) + + def _assert_out_of_bound_start_seek(self, chunk, expected): + # clear amounts_seen + self.amounts_seen = [] + self.assertEqual(self.amounts_seen, []) + + # (position, change) + chunk.seek(20) # (20, 10) + chunk.seek(5) # (5, -5) + chunk.seek(20) # (20, 5) + chunk.seek(9) # (9, -1) + chunk.seek(20) # (20, 1) + chunk.seek(11) # (11, 0) + chunk.seek(20) # (20, 0) + chunk.seek(9) # (9, -1) + chunk.seek(20) # (20, 1) + chunk.seek(5) # (5, -5) + chunk.seek(20) # (20, 5) + chunk.seek(0) # (0, -10) + chunk.seek(0) # (0, 0) + + self.assertEqual(self.amounts_seen, expected) + + def _assert_out_of_bound_relative_seek(self, chunk, expected): + # clear amounts_seen + self.amounts_seen = [] + self.assertEqual(self.amounts_seen, []) + + # (position, change) + chunk.seek(20, 1) # (20, 10) + chunk.seek(-15, 1) # (5, -5) + chunk.seek(15, 1) # (20, 5) + chunk.seek(-11, 1) # (9, -1) + chunk.seek(11, 1) # (20, 1) + chunk.seek(-9, 1) # (11, 0) + chunk.seek(9, 1) # (20, 0) + chunk.seek(-11, 1) # (9, -1) + chunk.seek(11, 1) # (20, 1) + chunk.seek(-15, 1) # (5, -5) + chunk.seek(15, 1) # (20, 5) + chunk.seek(-20, 1) # (0, -10) + chunk.seek(-1000, 1) # (0, 0) + + self.assertEqual(self.amounts_seen, expected) + + def _assert_out_of_bound_end_seek(self, chunk, expected): + # clear amounts_seen + self.amounts_seen = [] + self.assertEqual(self.amounts_seen, []) + + # (position, change) + chunk.seek(10, 2) # (20, 10) + chunk.seek(-5, 2) # (5, -5) + chunk.seek(10, 2) # (20, 5) + chunk.seek(-1, 2) # (9, -1) + chunk.seek(10, 2) # (20, 1) + chunk.seek(1, 2) # (11, 0) + chunk.seek(10, 2) # (20, 0) + chunk.seek(-1, 2) # (9, -1) + chunk.seek(10, 2) # (20, 1) + chunk.seek(-5, 2) # (5, -5) + chunk.seek(10, 2) # (20, 5) + chunk.seek(-10, 2) # (0, -10) + chunk.seek(-1000, 2) # (0, 0) + + self.assertEqual(self.amounts_seen, expected) + def test_close_callbacks(self): with open(self.filename) as f: chunk = ReadFileChunk(f, chunk_size=1, full_file_size=3,