Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/bugfix-ReadFileChunk-16290.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "bugfix",
"category": "ReadFileChunk",
"description": "Fix seek behavior in ReadFileChunk class"
}
4 changes: 2 additions & 2 deletions s3transfer/bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ def signal_not_transferring(self):
"""Signal that data being read is not being transferred to S3"""
self.disable_bandwidth_limiting()

def seek(self, where):
self._fileobj.seek(where)
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)

def tell(self):
return self._fileobj.tell()
Expand Down
4 changes: 2 additions & 2 deletions s3transfer/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def read(self, amount=None):
raise self._transfer_coordinator.exception
return self._fileobj.read(amount)

def seek(self, where):
self._fileobj.seek(where)
def seek(self, where, whence=0):
self._fileobj.seek(where, whence)

def tell(self):
return self._fileobj.tell()
Expand Down
35 changes: 27 additions & 8 deletions s3transfer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,9 @@ def write(self, data):
self._open_if_needed()
self._fileobj.write(data)

def seek(self, where):
def seek(self, where, whence=0):
self._open_if_needed()
self._fileobj.seek(where)
self._fileobj.seek(where, whence)

def tell(self):
if self._fileobj is None:
Expand Down Expand Up @@ -425,6 +425,8 @@ def __init__(self, fileobj, chunk_size, full_file_size,
self._size = self._calculate_file_size(
self._fileobj, requested_size=chunk_size,
start_byte=self._start_byte, actual_file_size=full_file_size)
# _amount_read represents the position in the chunk and may exceed
# the chunk size, but won't allow reads out of bounds.
self._amount_read = 0
self._callbacks = callbacks
if callbacks is None:
Expand Down Expand Up @@ -473,10 +475,11 @@ def _calculate_file_size(self, fileobj, requested_size, start_byte,
return min(max_chunk_size, requested_size)

def read(self, amount=None):
amount_left = max(self._size - self._amount_read, 0)
if amount is None:
amount_to_read = self._size - self._amount_read
amount_to_read = amount_left
else:
amount_to_read = min(self._size - self._amount_read, amount)
amount_to_read = min(amount_left, amount)
data = self._fileobj.read(amount_to_read)
self._amount_read += len(data)
if self._callbacks is not None and self._callbacks_enabled:
Expand All @@ -499,13 +502,29 @@ def enable_callback(self):
def disable_callback(self):
self._callbacks_enabled = False

def seek(self, where):
self._fileobj.seek(self._start_byte + where)
def seek(self, where, whence=0):
if whence not in (0, 1, 2):
# Mimic io's error for invalid whence values
raise ValueError(
"invalid whence (%s, should be 0, 1 or 2)" % whence)

# Recalculate where based on chunk attributes so seek from file
# start (whence=0) is always used
where += self._start_byte
if whence == 1:
where += self._amount_read
elif whence == 2:
where += self._size

self._fileobj.seek(max(where, self._start_byte))
if self._callbacks is not None and self._callbacks_enabled:
# To also rewind the callback() for an accurate progress report
bounded_where = max(min(where - self._start_byte, self._size), 0)
bounded_amount_read = min(self._amount_read, self._size)
amount = bounded_where - bounded_amount_read
invoke_progress_callbacks(
self._callbacks, bytes_transferred=where - self._amount_read)
self._amount_read = where
self._callbacks, bytes_transferred=amount)
self._amount_read = max(where - self._start_byte, 0)

def close(self):
if self._close_callbacks is not None and self._callbacks_enabled:
Expand Down
4 changes: 2 additions & 2 deletions tests/functional/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from io import RawIOBase
from io import BytesIO
from botocore.awsrequest import create_request_object
import mock

Expand All @@ -27,7 +27,7 @@ class ArbitraryException(Exception):
pass


class SignalTransferringBody(RawIOBase):
class SignalTransferringBody(BytesIO):
"""A mocked body with the ability to signal when transfers occur"""
def __init__(self):
super(SignalTransferringBody, self).__init__()
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_seek(self):
stream.seek(1)
self.assertEqual(
mock_fileobj.seek.call_args_list,
[mock.call(1)]
[mock.call(1, 0)]
)

def test_tell(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self):
self._pos = 0
self.writes = []

def seek(self, pos):
def seek(self, pos, whence=0):
self._pos = pos

def write(self, data):
Expand Down
199 changes: 199 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,120 @@ def test_tell_and_seek(self):
self.assertEqual(chunk.tell(), 3)
chunk.seek(0)
self.assertEqual(chunk.tell(), 0)
chunk.seek(1, whence=1)
self.assertEqual(chunk.tell(), 1)
chunk.seek(-1, whence=1)
self.assertEqual(chunk.tell(), 0)
chunk.seek(-1, whence=2)
self.assertEqual(chunk.tell(), 2)

def test_tell_and_seek_boundaries(self):
# Test to ensure ReadFileChunk behaves the same as the
# Python standard library around seeking and reading out
# of bounds in a file object.
data = b'abcdefghij12345678klmnopqrst'
start_pos = 10
chunk_size = 8

# Create test file
filename = os.path.join(self.tempdir, 'foo')
with open(filename, 'wb') as f:
f.write(data)

# ReadFileChunk should be a substring of only numbers
file_objects = [
ReadFileChunk.from_filename(
filename, start_byte=start_pos, chunk_size=chunk_size
)]

# Uncomment next line to validate we match Python's io.BytesIO
# file_objects.append(io.BytesIO(data[start_pos:start_pos+chunk_size]))

for obj in file_objects:
self._assert_whence_start_behavior(obj)
self._assert_whence_end_behavior(obj)
self._assert_whence_relative_behavior(obj)
self._assert_boundary_behavior(obj)

def _assert_whence_start_behavior(self, file_obj):
self.assertEqual(file_obj.tell(), 0)

file_obj.seek(1, 0)
self.assertEqual(file_obj.tell(), 1)

file_obj.seek(1)
self.assertEqual(file_obj.tell(), 1)
self.assertEqual(file_obj.read(), b'2345678')

file_obj.seek(3, 0)
self.assertEqual(file_obj.tell(), 3)

file_obj.seek(0, 0)
self.assertEqual(file_obj.tell(), 0)

def _assert_whence_relative_behavior(self, file_obj):
self.assertEqual(file_obj.tell(), 0)

file_obj.seek(2, 1)
self.assertEqual(file_obj.tell(), 2)

file_obj.seek(1, 1)
self.assertEqual(file_obj.tell(), 3)
self.assertEqual(file_obj.read(), b'45678')

file_obj.seek(20, 1)
self.assertEqual(file_obj.tell(), 28)

file_obj.seek(-30, 1)
self.assertEqual(file_obj.tell(), 0)
self.assertEqual(file_obj.read(), b'12345678')

file_obj.seek(-8, 1)
self.assertEqual(file_obj.tell(), 0)

def _assert_whence_end_behavior(self, file_obj):
self.assertEqual(file_obj.tell(), 0)

file_obj.seek(-1, 2)
self.assertEqual(file_obj.tell(), 7)

file_obj.seek(1, 2)
self.assertEqual(file_obj.tell(), 9)

file_obj.seek(3, 2)
self.assertEqual(file_obj.tell(), 11)
self.assertEqual(file_obj.read(), b'')

file_obj.seek(-15, 2)
self.assertEqual(file_obj.tell(), 0)
self.assertEqual(file_obj.read(), b'12345678')

file_obj.seek(-8, 2)
self.assertEqual(file_obj.tell(), 0)

def _assert_boundary_behavior(self, file_obj):
# Verify we're at the start
self.assertEqual(file_obj.tell(), 0)

# Verify we can't move backwards beyond start of file
file_obj.seek(-10, 1)
self.assertEqual(file_obj.tell(), 0)

# Verify we *can* move after end of file, but return nothing
file_obj.seek(10, 2)
self.assertEqual(file_obj.tell(), 18)
self.assertEqual(file_obj.read(), b'')
self.assertEqual(file_obj.read(10), b'')

# Verify we can partially rewind
file_obj.seek(-12, 1)
self.assertEqual(file_obj.tell(), 6)
self.assertEqual(file_obj.read(), b'78')
self.assertEqual(file_obj.tell(), 8)

# Verify we can rewind to start
file_obj.seek(0)
self.assertEqual(file_obj.tell(), 0)

def test_file_chunk_supports_context_manager(self):
filename = os.path.join(self.tempdir, 'foo')
Expand Down Expand Up @@ -554,6 +668,91 @@ def test_callback_will_also_be_triggered_by_seek(self):
chunk.read(2)
self.assertEqual(self.amounts_seen, [2, -2, 2, -1, 2])

def test_callback_triggered_by_out_of_bound_seeks(self):
data = b'abcdefghij1234567890klmnopqr'

# Create test file
filename = os.path.join(self.tempdir, 'foo')
with open(filename, 'wb') as f:
f.write(data)
chunk = ReadFileChunk.from_filename(
filename, start_byte=10, chunk_size=10,
callbacks=[self.callback])

# Seek calls that generate "0" progress are skipped by
# invoke_progress_callbacks and won't appear in the list.
expected_callback_prog = [10, -5, 5, -1, 1, -1, 1, -5, 5, -10]

self._assert_out_of_bound_start_seek(chunk, expected_callback_prog)
self._assert_out_of_bound_relative_seek(chunk, expected_callback_prog)
self._assert_out_of_bound_end_seek(chunk, expected_callback_prog)

def _assert_out_of_bound_start_seek(self, chunk, expected):
# clear amounts_seen
self.amounts_seen = []
self.assertEqual(self.amounts_seen, [])

# (position, change)
chunk.seek(20) # (20, 10)
chunk.seek(5) # (5, -5)
chunk.seek(20) # (20, 5)
chunk.seek(9) # (9, -1)
chunk.seek(20) # (20, 1)
chunk.seek(11) # (11, 0)
chunk.seek(20) # (20, 0)
chunk.seek(9) # (9, -1)
chunk.seek(20) # (20, 1)
chunk.seek(5) # (5, -5)
chunk.seek(20) # (20, 5)
chunk.seek(0) # (0, -10)
chunk.seek(0) # (0, 0)

self.assertEqual(self.amounts_seen, expected)

def _assert_out_of_bound_relative_seek(self, chunk, expected):
# clear amounts_seen
self.amounts_seen = []
self.assertEqual(self.amounts_seen, [])

# (position, change)
chunk.seek(20, 1) # (20, 10)
chunk.seek(-15, 1) # (5, -5)
chunk.seek(15, 1) # (20, 5)
chunk.seek(-11, 1) # (9, -1)
chunk.seek(11, 1) # (20, 1)
chunk.seek(-9, 1) # (11, 0)
chunk.seek(9, 1) # (20, 0)
chunk.seek(-11, 1) # (9, -1)
chunk.seek(11, 1) # (20, 1)
chunk.seek(-15, 1) # (5, -5)
chunk.seek(15, 1) # (20, 5)
chunk.seek(-20, 1) # (0, -10)
chunk.seek(-1000, 1) # (0, 0)

self.assertEqual(self.amounts_seen, expected)

def _assert_out_of_bound_end_seek(self, chunk, expected):
# clear amounts_seen
self.amounts_seen = []
self.assertEqual(self.amounts_seen, [])

# (position, change)
chunk.seek(10, 2) # (20, 10)
chunk.seek(-5, 2) # (5, -5)
chunk.seek(10, 2) # (20, 5)
chunk.seek(-1, 2) # (9, -1)
chunk.seek(10, 2) # (20, 1)
chunk.seek(1, 2) # (11, 0)
chunk.seek(10, 2) # (20, 0)
chunk.seek(-1, 2) # (9, -1)
chunk.seek(10, 2) # (20, 1)
chunk.seek(-5, 2) # (5, -5)
chunk.seek(10, 2) # (20, 5)
chunk.seek(-10, 2) # (0, -10)
chunk.seek(-1000, 2) # (0, 0)

self.assertEqual(self.amounts_seen, expected)

def test_close_callbacks(self):
with open(self.filename) as f:
chunk = ReadFileChunk(f, chunk_size=1, full_file_size=3,
Expand Down