Skip to content

Commit

Permalink
bpo-22908: Add seek and tell functionality to ZipExtFile (GH-4966)
Browse files Browse the repository at this point in the history
This allows for nested zip files, tar files within zip files, zip files within tar files, etc.

Contributed by: John Jolly
  • Loading branch information
jjolly authored and gpshead committed Jan 30, 2018
1 parent 2e0ecde commit 066df4f
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 3 deletions.
6 changes: 3 additions & 3 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,9 @@ ZipFile Objects
With *mode* ``'r'`` the file-like object
(``ZipExtFile``) is read-only and provides the following methods:
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
:meth:`~io.IOBase.readlines`, :meth:`__iter__`,
:meth:`~iterator.__next__`. These objects can operate independently of
the ZipFile.
:meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`,
:meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`.
These objects can operate independently of the ZipFile.

With ``mode='w'``, a writable file handle is returned, which supports the
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
Expand Down
34 changes: 34 additions & 0 deletions Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,40 @@ def test_open_conflicting_handles(self):
self.assertEqual(zipf.read('baz'), msg3)
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])

def test_seek_tell(self):
# Test seek functionality
txt = b"Where's Bruce?"
bloc = txt.find(b"Bruce")
# Check seek on a file
with zipfile.ZipFile(TESTFN, "w") as zipf:
zipf.writestr("foo.txt", txt)
with zipfile.ZipFile(TESTFN, "r") as zipf:
with zipf.open("foo.txt", "r") as fp:
fp.seek(bloc, os.SEEK_SET)
self.assertEqual(fp.tell(), bloc)
fp.seek(-bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), 0)
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))
# Check seek on memory file
data = io.BytesIO()
with zipfile.ZipFile(data, mode="w") as zipf:
zipf.writestr("foo.txt", txt)
with zipfile.ZipFile(data, mode="r") as zipf:
with zipf.open("foo.txt", "r") as fp:
fp.seek(bloc, os.SEEK_SET)
self.assertEqual(fp.tell(), bloc)
fp.seek(-bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), 0)
fp.seek(bloc, os.SEEK_CUR)
self.assertEqual(fp.tell(), bloc)
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
fp.seek(0, os.SEEK_END)
self.assertEqual(fp.tell(), len(txt))

def tearDown(self):
unlink(TESTFN)
unlink(TESTFN2)
Expand Down
82 changes: 82 additions & 0 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,18 @@ def __init__(self, file, pos, close, lock, writing):
self._close = close
self._lock = lock
self._writing = writing
self.seekable = file.seekable
self.tell = file.tell

def seek(self, offset, whence=0):
with self._lock:
if self.writing():
raise ValueError("Can't reposition in the ZIP file while "
"there is an open writing handle on it. "
"Close the writing handle before trying to read.")
self._file.seek(self._pos)
self._pos = self._file.tell()
return self._pos

def read(self, n=-1):
with self._lock:
Expand Down Expand Up @@ -746,6 +758,9 @@ class ZipExtFile(io.BufferedIOBase):
# Read from compressed files in 4k blocks.
MIN_READ_SIZE = 4096

# Chunk size to read during seek
MAX_SEEK_READ = 1 << 24

def __init__(self, fileobj, mode, zipinfo, decrypter=None,
close_fileobj=False):
self._fileobj = fileobj
Expand Down Expand Up @@ -778,6 +793,17 @@ def __init__(self, fileobj, mode, zipinfo, decrypter=None,
else:
self._expected_crc = None

self._seekable = False
try:
if fileobj.seekable():
self._orig_compress_start = fileobj.tell()
self._orig_compress_size = zipinfo.compress_size
self._orig_file_size = zipinfo.file_size
self._orig_start_crc = self._running_crc
self._seekable = True
except AttributeError:
pass

def __repr__(self):
result = ['<%s.%s' % (self.__class__.__module__,
self.__class__.__qualname__)]
Expand Down Expand Up @@ -963,6 +989,62 @@ def close(self):
finally:
super().close()

def seekable(self):
return self._seekable

def seek(self, offset, whence=0):
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
curr_pos = self.tell()
if whence == 0: # Seek from start of file
new_pos = offset
elif whence == 1: # Seek from current position
new_pos = curr_pos + offset
elif whence == 2: # Seek from EOF
new_pos = self._orig_file_size + offset
else:
raise ValueError("whence must be os.SEEK_SET (0), "
"os.SEEK_CUR (1), or os.SEEK_END (2)")

if new_pos > self._orig_file_size:
new_pos = self._orig_file_size

if new_pos < 0:
new_pos = 0

read_offset = new_pos - curr_pos
buff_offset = read_offset + self._offset

if buff_offset >= 0 and buff_offset < len(self._readbuffer):
# Just move the _offset index if the new position is in the _readbuffer
self._offset = buff_offset
read_offset = 0
elif read_offset < 0:
# Position is before the current position. Reset the ZipExtFile

self._fileobj.seek(self._orig_compress_start)
self._running_crc = self._orig_start_crc
self._compress_left = self._orig_compress_size
self._left = self._orig_file_size
self._readbuffer = b''
self._offset = 0
self._decompressor = zipfile._get_decompressor(self._compress_type)
self._eof = False
read_offset = new_pos

while read_offset > 0:
read_len = min(self.MAX_SEEK_READ, read_offset)
self.read(read_len)
read_offset -= read_len

return self.tell()

def tell(self):
if not self._seekable:
raise io.UnsupportedOperation("underlying stream is not seekable")
filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
return filepos


class _ZipWriteFile(io.BufferedIOBase):
def __init__(self, zf, zinfo, zip64):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Added seek and tell to the ZipExtFile class. This only works if the file
object used to open the zipfile is seekable.

0 comments on commit 066df4f

Please sign in to comment.