Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-22908: Add seek and tell functionality to ZipExtFile #4966

Merged
merged 10 commits into from
Jan 30, 2018
6 changes: 3 additions & 3 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,9 @@ ZipFile Objects
With *mode* ``'r'`` the file-like object
(``ZipExtFile``) is read-only and provides the following methods:
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
:meth:`~io.IOBase.readlines`, :meth:`__iter__`,
:meth:`~iterator.__next__`. These objects can operate independently of
the ZipFile.
:meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`,
:meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`.
These objects can operate independently of the ZipFile.

With ``mode='w'``, a writable file handle is returned, which supports the
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
Expand Down
34 changes: 34 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1596,6 +1596,40 @@ def test_open_conflicting_handles(self):
self.assertEqual(zipf.read('baz'), msg3)
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])

def test_seek_tell(self):
# Test seek functionality
txt = b"Where's Bruce?"
bloc = txt.find(b"Bruce")
# Check seek on a file
with zipfile.ZipFile(TESTFN, "w") as zipf:
zipf.writestr("foo.txt", txt)
with zipfile.ZipFile(TESTFN, "r") as zipf:
with zipf.open("foo.txt", "r") as fp:
fp.seek(bloc, os.SEEK_SET)
self.assertEqual(fp.tell(), bloc)
fp.seek(-bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), 0)
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))
# Check seek on memory file
data = io.BytesIO()
with zipfile.ZipFile(data, mode="w") as zipf:
zipf.writestr("foo.txt", txt)
with zipfile.ZipFile(data, mode="r") as zipf:
with zipf.open("foo.txt", "r") as fp:
fp.seek(bloc, os.SEEK_SET)
self.assertEqual(fp.tell(), bloc)
fp.seek(-bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), 0)
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))

def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
Expand Down
82 changes: 82 additions & 0 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,18 @@ def __init__(self, file, pos, close, lock, writing):
self._close = close
self._lock = lock
self._writing = writing
self.seekable = file.seekable
self.tell = file.tell

def seek(self, offset, whence=0):
with self._lock:
if self.writing():
raise ValueError("Can't reposition in the ZIP file while "
"there is an open writing handle on it. "
"Close the writing handle before trying to read.")
self._file.seek(self._pos)
self._pos = self._file.tell()
return self._pos

def read(self, n=-1):
with self._lock:
Expand Down Expand Up @@ -740,6 +752,9 @@ class ZipExtFile(io.BufferedIOBase):
# Read from compressed files in 4k blocks.
MIN_READ_SIZE = 4096

# Chunk size to read during seek
MAX_SEEK_READ = 1 << 24

def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj = fileobj
Expand Down Expand Up @@ -772,6 +787,17 @@ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
else:
self._expected_crc = None

self._seekable = False
try:
if fileobj.seekable():
self._orig_compress_start = fileobj.tell()
self._orig_compress_size = zipinfo.compress_size
self._orig_file_size = zipinfo.file_size
self._orig_start_crc = self._running_crc
self._seekable = True
except AttributeError:
pass

def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
self.__class__.__qualname__)]
Expand Down Expand Up @@ -957,6 +983,62 @@ def close(self):
finally:
super().close()

def seekable(self):
return self._seekable

def seek(self, offset, whence=0):
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
curr_pos = self.tell()
if whence == 0: # Seek from start of file
new_pos = offset
elif whence == 1: # Seek from current position
new_pos = curr_pos + offset
elif whence == 2: # Seek from EOF
new_pos = self._orig_file_size + offset
else:
raise ValueError("whence must be os.SEEK_SET (0), "
"os.SEEK_CUR (1), or os.SEEK_END (2)")

if new_pos > self._orig_file_size:
new_pos = self._orig_file_size

if new_pos < 0:
new_pos = 0

read_offset = new_pos - curr_pos
buff_offset = read_offset + self._offset

if buff_offset >= 0 and buff_offset < len(self._readbuffer):
# Just move the _offset index if the new position is in the _readbuffer
self._offset = buff_offset
read_offset = 0
elif read_offset < 0:
# Position is before the current position. Reset the ZipExtFile

self._fileobj.seek(self._orig_compress_start)
self._running_crc = self._orig_start_crc
self._compress_left = self._orig_compress_size
self._left = self._orig_file_size
self._readbuffer = b''
self._offset = 0
self._decompressor = zipfile._get_decompressor(self._compress_type)
self._eof = False
read_offset = new_pos

while read_offset > 0:
read_len = min(self.MAX_SEEK_READ, read_offset)
self.read(read_len)
read_offset -= read_len

return self.tell()

def tell(self):
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
return filepos


class _ZipWriteFile(io.BufferedIOBase):
def __init__(self, zf, zinfo, zip64):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added seek and tell to the ZipExtFile class. This only works if the file
object used to open the zipfile is seekable.