Skip to content

Commit 798bcaa

Browse files
pR0Psgpshead
andauthored
pythongh-103861: Fix Zip64 extensions not being properly applied in some cases (python#103863)
Fix Zip64 extensions not being properly applied in some cases: Fixes an issue where adding a small file to a `ZipFile` object while forcing zip64 extensions causes an extra Zip64 record to be added to the zip, but doesn't update the `min_version` or file sizes in the primary central directory header. Also fixed an edge case in checking if zip64 extensions are required: This fixes an issue where if data requiring zip64 extensions was added to an unseekable stream without specifying `force_zip64=True`, zip64 extensions would not be used and a RuntimeError would not be raised when closing the file (even though the size would be known at that point). This would result in successfully writing corrupt zip files. Deciding if zip64 extensions are required outside of the `FileHeader` function means that both `FileHeader` and `_ZipWriteFile` will always be in sync. Previously, the `FileHeader` function could enable zip64 extensions without propagating that decision to the `_ZipWriteFile` class, which would then not correctly write the data descriptor record or check for errors on close. If anyone is actually using `ZipInfo.FileHeader` as a public API without explicitly passing True or False in for zip64, their own code may still be susceptible to that kind of bug unless they make a similar change to where the zip64 decision happens. Fixes python#103861 --------- Co-authored-by: Gregory P. Smith <[email protected]>
1 parent 85ec192 commit 798bcaa

File tree

3 files changed

+172
-15
lines changed

3 files changed

+172
-15
lines changed

Lib/test/test_zipfile/test_core.py

+153
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,159 @@ def test_generated_valid_zip64_extra(self):
10801080
self.assertEqual(zinfo.header_offset, expected_header_offset)
10811081
self.assertEqual(zf.read(zinfo), expected_content)
10821082

1083+
def test_force_zip64(self):
1084+
"""Test that forcing zip64 extensions correctly notes this in the zip file"""
1085+
1086+
# GH-103861 describes an issue where forcing a small file to use zip64
1087+
# extensions would add a zip64 extra record, but not change the data
1088+
# sizes to 0xFFFFFFFF to indicate to the extractor that the zip64
1089+
# record should be read. Additionally, it would not set the required
1090+
# version to indicate that zip64 extensions are required to extract it.
1091+
# This test replicates the situation and reads the raw data to specifically ensure:
1092+
# - The required extract version is always >= ZIP64_VERSION
1093+
# - The compressed and uncompressed size in the file headers are both
1094+
# 0xFFFFFFFF (ie. point to zip64 record)
1095+
# - The zip64 record is provided and has the correct sizes in it
1096+
# Other aspects of the zip are checked as well, but verifying the above is the main goal.
1097+
# Because this is hard to verify by parsing the data as a zip, the raw
1098+
# bytes are checked to ensure that they line up with the zip spec.
1099+
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
1100+
# The relevent sections for this test are:
1101+
# - 4.3.7 for local file header
1102+
# - 4.5.3 for zip64 extra field
1103+
1104+
data = io.BytesIO()
1105+
with zipfile.ZipFile(data, mode="w", allowZip64=True) as zf:
1106+
with zf.open("text.txt", mode="w", force_zip64=True) as zi:
1107+
zi.write(b"_")
1108+
1109+
zipdata = data.getvalue()
1110+
1111+
# pull out and check zip information
1112+
(
1113+
header, vers, os, flags, comp, csize, usize, fn_len,
1114+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, cd_sig
1115+
) = struct.unpack("<4sBBHH8xIIHH8shhQQx4s", zipdata[:63])
1116+
1117+
self.assertEqual(header, b"PK\x03\x04") # local file header
1118+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1119+
self.assertEqual(os, 0) # compatible with MS-DOS
1120+
self.assertEqual(flags, 0) # no flags
1121+
self.assertEqual(comp, 0) # compression method = stored
1122+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1123+
self.assertEqual(usize, 0xFFFFFFFF)
1124+
self.assertEqual(fn_len, 8) # filename len
1125+
self.assertEqual(ex_total_len, 20) # size of extra records
1126+
self.assertEqual(ex_id, 1) # Zip64 extra record
1127+
self.assertEqual(ex_len, 16) # 16 bytes of data
1128+
self.assertEqual(ex_usize, 1) # uncompressed size
1129+
self.assertEqual(ex_csize, 1) # compressed size
1130+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1131+
1132+
z = zipfile.ZipFile(io.BytesIO(zipdata))
1133+
zinfos = z.infolist()
1134+
self.assertEqual(len(zinfos), 1)
1135+
self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract
1136+
1137+
def test_unseekable_zip_unknown_filesize(self):
1138+
"""Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used"""
1139+
1140+
def make_zip(fp):
1141+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1142+
with zf.open("text.txt", mode="w", force_zip64=False) as zi:
1143+
zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1))
1144+
1145+
self.assertRaises(RuntimeError, make_zip, io.BytesIO())
1146+
self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO()))
1147+
1148+
def test_zip64_required_not_allowed_fail(self):
1149+
"""Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add"""
1150+
def make_zip(fp):
1151+
with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf:
1152+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1153+
info = zipfile.ZipInfo("text.txt")
1154+
info.file_size = zipfile.ZIP64_LIMIT + 1
1155+
zf.open(info, mode="w")
1156+
1157+
self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO())
1158+
self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO()))
1159+
1160+
def test_unseekable_zip_known_filesize(self):
1161+
"""Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front"""
1162+
1163+
# This test ensures that the zip will use a zip64 data descriptor (same
1164+
# as a regular data descriptor except the sizes are 8 bytes instead of
1165+
# 4) record to communicate the size of a file if the zip is being
1166+
# written to an unseekable stream.
1167+
# Because this sort of thing is hard to verify by parsing the data back
1168+
# in as a zip, this test looks at the raw bytes created to ensure that
1169+
# the correct data has been generated.
1170+
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
1171+
# The relevent sections for this test are:
1172+
# - 4.3.7 for local file header
1173+
# - 4.3.9 for the data descriptor
1174+
# - 4.5.3 for zip64 extra field
1175+
1176+
file_size = zipfile.ZIP64_LIMIT + 1
1177+
1178+
def make_zip(fp):
1179+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1180+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1181+
info = zipfile.ZipInfo("text.txt")
1182+
info.file_size = file_size
1183+
with zf.open(info, mode="w", force_zip64=False) as zi:
1184+
zi.write(b"_" * file_size)
1185+
return fp
1186+
1187+
# check seekable file information
1188+
seekable_data = make_zip(io.BytesIO()).getvalue()
1189+
(
1190+
header, vers, os, flags, comp, csize, usize, fn_len,
1191+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1192+
cd_sig
1193+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size])
1194+
1195+
self.assertEqual(header, b"PK\x03\x04") # local file header
1196+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1197+
self.assertEqual(os, 0) # compatible with MS-DOS
1198+
self.assertEqual(flags, 0) # no flags set
1199+
self.assertEqual(comp, 0) # compression method = stored
1200+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1201+
self.assertEqual(usize, 0xFFFFFFFF)
1202+
self.assertEqual(fn_len, 8) # filename len
1203+
self.assertEqual(ex_total_len, 20) # size of extra records
1204+
self.assertEqual(ex_id, 1) # Zip64 extra record
1205+
self.assertEqual(ex_len, 16) # 16 bytes of data
1206+
self.assertEqual(ex_usize, file_size) # uncompressed size
1207+
self.assertEqual(ex_csize, file_size) # compressed size
1208+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1209+
1210+
# check unseekable file information
1211+
unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue()
1212+
(
1213+
header, vers, os, flags, comp, csize, usize, fn_len,
1214+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1215+
dd_header, dd_usize, dd_csize, cd_sig
1216+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size])
1217+
1218+
self.assertEqual(header, b"PK\x03\x04") # local file header
1219+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1220+
self.assertEqual(os, 0) # compatible with MS-DOS
1221+
self.assertEqual("{:b}".format(flags), "1000") # streaming flag set
1222+
self.assertEqual(comp, 0) # compression method = stored
1223+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1224+
self.assertEqual(usize, 0xFFFFFFFF)
1225+
self.assertEqual(fn_len, 8) # filename len
1226+
self.assertEqual(ex_total_len, 20) # size of extra records
1227+
self.assertEqual(ex_id, 1) # Zip64 extra record
1228+
self.assertEqual(ex_len, 16) # 16 bytes of data
1229+
self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor
1230+
self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor
1231+
self.assertEqual(dd_header, b"PK\07\x08") # data descriptor
1232+
self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64)
1233+
self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64)
1234+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1235+
10831236

10841237
@requires_zlib()
10851238
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,

Lib/zipfile/__init__.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,12 @@ def __repr__(self):
442442
return ''.join(result)
443443

444444
def FileHeader(self, zip64=None):
445-
"""Return the per-file header as a bytes object."""
445+
"""Return the per-file header as a bytes object.
446+
447+
When the optional zip64 arg is None rather than a bool, we will
448+
decide based upon the file_size and compress_size, if known,
449+
False otherwise.
450+
"""
446451
dt = self.date_time
447452
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
448453
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
@@ -458,16 +463,13 @@ def FileHeader(self, zip64=None):
458463

459464
min_version = 0
460465
if zip64 is None:
466+
# We always explicitly pass zip64 within this module.... This
467+
# remains for anyone using ZipInfo.FileHeader as a public API.
461468
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
462469
if zip64:
463470
fmt = '<HHQQ'
464471
extra = extra + struct.pack(fmt,
465472
1, struct.calcsize(fmt)-4, file_size, compress_size)
466-
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
467-
if not zip64:
468-
raise LargeZipFile("Filesize would require ZIP64 extensions")
469-
# File is larger than what fits into a 4 byte integer,
470-
# fall back to the ZIP64 extension
471473
file_size = 0xffffffff
472474
compress_size = 0xffffffff
473475
min_version = ZIP64_VERSION
@@ -1219,6 +1221,12 @@ def close(self):
12191221
self._zinfo.CRC = self._crc
12201222
self._zinfo.file_size = self._file_size
12211223

1224+
if not self._zip64:
1225+
if self._file_size > ZIP64_LIMIT:
1226+
raise RuntimeError("File size too large, try using force_zip64")
1227+
if self._compress_size > ZIP64_LIMIT:
1228+
raise RuntimeError("Compressed size too large, try using force_zip64")
1229+
12221230
# Write updated header info
12231231
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
12241232
# Write CRC and file sizes after the file data
@@ -1227,13 +1235,6 @@ def close(self):
12271235
self._zinfo.compress_size, self._zinfo.file_size))
12281236
self._zipfile.start_dir = self._fileobj.tell()
12291237
else:
1230-
if not self._zip64:
1231-
if self._file_size > ZIP64_LIMIT:
1232-
raise RuntimeError(
1233-
'File size too large, try using force_zip64')
1234-
if self._compress_size > ZIP64_LIMIT:
1235-
raise RuntimeError(
1236-
'Compressed size too large, try using force_zip64')
12371238
# Seek backwards and write file header (which will now include
12381239
# correct CRC and file sizes)
12391240

@@ -1672,8 +1673,9 @@ def _open_to_write(self, zinfo, force_zip64=False):
16721673
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
16731674

16741675
# Compressed size can be larger than uncompressed size
1675-
zip64 = self._allowZip64 and \
1676-
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1676+
zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1677+
if not self._allowZip64 and zip64:
1678+
raise LargeZipFile("Filesize would require ZIP64 extensions")
16771679

16781680
if self._seekable:
16791681
self.fp.seek(self.start_dir)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix ``zipfile.Zipfile`` creating invalid zip files when ``force_zip64`` was
2+
used to add files to them. Patch by Carey Metcalfe.

0 commit comments

Comments
 (0)