Skip to content

Commit f09ad48

Browse files
pR0Psgpshead
authored andcommitted
[3.11] pythongh-103861: Fix Zip64 extensions not being properly applied in some cases (pythonGH-103863)
Fix Zip64 extensions not being properly applied in some cases: Fixes an issue where adding a small file to a `ZipFile` object while forcing zip64 extensions causes an extra Zip64 record to be added to the zip, but doesn't update the `min_version` or file sizes in the primary central directory header. Also fixed an edge case in checking if zip64 extensions are required: This fixes an issue where if data requiring zip64 extensions was added to an unseekable stream without specifying `force_zip64=True`, zip64 extensions would not be used and a RuntimeError would not be raised when closing the file (even though the size would be known at that point). This would result in successfully writing corrupt zip files. Deciding if zip64 extensions are required outside of the `FileHeader` function means that both `FileHeader` and `_ZipWriteFile` will always be in sync. Previously, the `FileHeader` function could enable zip64 extensions without propagating that decision to the `_ZipWriteFile` class, which would then not correctly write the data descriptor record or check for errors on close. If anyone is actually using `ZipInfo.FileHeader` as a public API without explicitly passing True or False in for zip64, their own code may still be susceptible to that kind of bug unless they make a similar change to where the zip64 decision happens. Fixes pythonGH-103861 --------- Co-authored-by: Gregory P. Smith <[email protected]>. (cherry picked from commit 798bcaa) Co-authored-by: Carey Metcalfe <[email protected]>
1 parent 7779027 commit f09ad48

File tree

3 files changed

+172
-15
lines changed

3 files changed

+172
-15
lines changed

Lib/test/test_zipfile.py

+153
Original file line numberDiff line numberDiff line change
@@ -1082,6 +1082,159 @@ def test_generated_valid_zip64_extra(self):
10821082
self.assertEqual(zinfo.header_offset, expected_header_offset)
10831083
self.assertEqual(zf.read(zinfo), expected_content)
10841084

1085+
def test_force_zip64(self):
1086+
"""Test that forcing zip64 extensions correctly notes this in the zip file"""
1087+
1088+
# GH-103861 describes an issue where forcing a small file to use zip64
1089+
# extensions would add a zip64 extra record, but not change the data
1090+
# sizes to 0xFFFFFFFF to indicate to the extractor that the zip64
1091+
# record should be read. Additionally, it would not set the required
1092+
# version to indicate that zip64 extensions are required to extract it.
1093+
# This test replicates the situation and reads the raw data to specifically ensure:
1094+
# - The required extract version is always >= ZIP64_VERSION
1095+
# - The compressed and uncompressed size in the file headers are both
1096+
# 0xFFFFFFFF (ie. point to zip64 record)
1097+
# - The zip64 record is provided and has the correct sizes in it
1098+
# Other aspects of the zip are checked as well, but verifying the above is the main goal.
1099+
# Because this is hard to verify by parsing the data as a zip, the raw
1100+
# bytes are checked to ensure that they line up with the zip spec.
1101+
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
1102+
# The relevent sections for this test are:
1103+
# - 4.3.7 for local file header
1104+
# - 4.5.3 for zip64 extra field
1105+
1106+
data = io.BytesIO()
1107+
with zipfile.ZipFile(data, mode="w", allowZip64=True) as zf:
1108+
with zf.open("text.txt", mode="w", force_zip64=True) as zi:
1109+
zi.write(b"_")
1110+
1111+
zipdata = data.getvalue()
1112+
1113+
# pull out and check zip information
1114+
(
1115+
header, vers, os, flags, comp, csize, usize, fn_len,
1116+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize, cd_sig
1117+
) = struct.unpack("<4sBBHH8xIIHH8shhQQx4s", zipdata[:63])
1118+
1119+
self.assertEqual(header, b"PK\x03\x04") # local file header
1120+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1121+
self.assertEqual(os, 0) # compatible with MS-DOS
1122+
self.assertEqual(flags, 0) # no flags
1123+
self.assertEqual(comp, 0) # compression method = stored
1124+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1125+
self.assertEqual(usize, 0xFFFFFFFF)
1126+
self.assertEqual(fn_len, 8) # filename len
1127+
self.assertEqual(ex_total_len, 20) # size of extra records
1128+
self.assertEqual(ex_id, 1) # Zip64 extra record
1129+
self.assertEqual(ex_len, 16) # 16 bytes of data
1130+
self.assertEqual(ex_usize, 1) # uncompressed size
1131+
self.assertEqual(ex_csize, 1) # compressed size
1132+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1133+
1134+
z = zipfile.ZipFile(io.BytesIO(zipdata))
1135+
zinfos = z.infolist()
1136+
self.assertEqual(len(zinfos), 1)
1137+
self.assertGreaterEqual(zinfos[0].extract_version, zipfile.ZIP64_VERSION) # requires zip64 to extract
1138+
1139+
def test_unseekable_zip_unknown_filesize(self):
1140+
"""Test that creating a zip with/without seeking will raise a RuntimeError if zip64 was required but not used"""
1141+
1142+
def make_zip(fp):
1143+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1144+
with zf.open("text.txt", mode="w", force_zip64=False) as zi:
1145+
zi.write(b"_" * (zipfile.ZIP64_LIMIT + 1))
1146+
1147+
self.assertRaises(RuntimeError, make_zip, io.BytesIO())
1148+
self.assertRaises(RuntimeError, make_zip, Unseekable(io.BytesIO()))
1149+
1150+
def test_zip64_required_not_allowed_fail(self):
1151+
"""Test that trying to add a large file to a zip that doesn't allow zip64 extensions fails on add"""
1152+
def make_zip(fp):
1153+
with zipfile.ZipFile(fp, mode="w", allowZip64=False) as zf:
1154+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1155+
info = zipfile.ZipInfo("text.txt")
1156+
info.file_size = zipfile.ZIP64_LIMIT + 1
1157+
zf.open(info, mode="w")
1158+
1159+
self.assertRaises(zipfile.LargeZipFile, make_zip, io.BytesIO())
1160+
self.assertRaises(zipfile.LargeZipFile, make_zip, Unseekable(io.BytesIO()))
1161+
1162+
def test_unseekable_zip_known_filesize(self):
1163+
"""Test that creating a zip without seeking will use zip64 extensions if the file size is provided up-front"""
1164+
1165+
# This test ensures that the zip will use a zip64 data descriptor (same
1166+
# as a regular data descriptor except the sizes are 8 bytes instead of
1167+
# 4) record to communicate the size of a file if the zip is being
1168+
# written to an unseekable stream.
1169+
# Because this sort of thing is hard to verify by parsing the data back
1170+
# in as a zip, this test looks at the raw bytes created to ensure that
1171+
# the correct data has been generated.
1172+
# The spec for this can be found at: https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
1173+
# The relevent sections for this test are:
1174+
# - 4.3.7 for local file header
1175+
# - 4.3.9 for the data descriptor
1176+
# - 4.5.3 for zip64 extra field
1177+
1178+
file_size = zipfile.ZIP64_LIMIT + 1
1179+
1180+
def make_zip(fp):
1181+
with zipfile.ZipFile(fp, mode="w", allowZip64=True) as zf:
1182+
# pretend zipfile.ZipInfo.from_file was used to get the name and filesize
1183+
info = zipfile.ZipInfo("text.txt")
1184+
info.file_size = file_size
1185+
with zf.open(info, mode="w", force_zip64=False) as zi:
1186+
zi.write(b"_" * file_size)
1187+
return fp
1188+
1189+
# check seekable file information
1190+
seekable_data = make_zip(io.BytesIO()).getvalue()
1191+
(
1192+
header, vers, os, flags, comp, csize, usize, fn_len,
1193+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1194+
cd_sig
1195+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s".format(file_size), seekable_data[:62 + file_size])
1196+
1197+
self.assertEqual(header, b"PK\x03\x04") # local file header
1198+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1199+
self.assertEqual(os, 0) # compatible with MS-DOS
1200+
self.assertEqual(flags, 0) # no flags set
1201+
self.assertEqual(comp, 0) # compression method = stored
1202+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1203+
self.assertEqual(usize, 0xFFFFFFFF)
1204+
self.assertEqual(fn_len, 8) # filename len
1205+
self.assertEqual(ex_total_len, 20) # size of extra records
1206+
self.assertEqual(ex_id, 1) # Zip64 extra record
1207+
self.assertEqual(ex_len, 16) # 16 bytes of data
1208+
self.assertEqual(ex_usize, file_size) # uncompressed size
1209+
self.assertEqual(ex_csize, file_size) # compressed size
1210+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1211+
1212+
# check unseekable file information
1213+
unseekable_data = make_zip(Unseekable(io.BytesIO())).fp.getvalue()
1214+
(
1215+
header, vers, os, flags, comp, csize, usize, fn_len,
1216+
ex_total_len, filename, ex_id, ex_len, ex_usize, ex_csize,
1217+
dd_header, dd_usize, dd_csize, cd_sig
1218+
) = struct.unpack("<4sBBHH8xIIHH8shhQQ{}x4s4xQQ4s".format(file_size), unseekable_data[:86 + file_size])
1219+
1220+
self.assertEqual(header, b"PK\x03\x04") # local file header
1221+
self.assertGreaterEqual(vers, zipfile.ZIP64_VERSION) # requires zip64 to extract
1222+
self.assertEqual(os, 0) # compatible with MS-DOS
1223+
self.assertEqual("{:b}".format(flags), "1000") # streaming flag set
1224+
self.assertEqual(comp, 0) # compression method = stored
1225+
self.assertEqual(csize, 0xFFFFFFFF) # sizes are in zip64 extra
1226+
self.assertEqual(usize, 0xFFFFFFFF)
1227+
self.assertEqual(fn_len, 8) # filename len
1228+
self.assertEqual(ex_total_len, 20) # size of extra records
1229+
self.assertEqual(ex_id, 1) # Zip64 extra record
1230+
self.assertEqual(ex_len, 16) # 16 bytes of data
1231+
self.assertEqual(ex_usize, 0) # uncompressed size - 0 to defer to data descriptor
1232+
self.assertEqual(ex_csize, 0) # compressed size - 0 to defer to data descriptor
1233+
self.assertEqual(dd_header, b"PK\07\x08") # data descriptor
1234+
self.assertEqual(dd_usize, file_size) # file size (8 bytes because zip64)
1235+
self.assertEqual(dd_csize, file_size) # compressed size (8 bytes because zip64)
1236+
self.assertEqual(cd_sig, b"PK\x01\x02") # ensure the central directory header is next
1237+
10851238

10861239
@requires_zlib()
10871240
class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,

Lib/zipfile.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,12 @@ def __repr__(self):
435435
return ''.join(result)
436436

437437
def FileHeader(self, zip64=None):
438-
"""Return the per-file header as a bytes object."""
438+
"""Return the per-file header as a bytes object.
439+
440+
When the optional zip64 arg is None rather than a bool, we will
441+
decide based upon the file_size and compress_size, if known,
442+
False otherwise.
443+
"""
439444
dt = self.date_time
440445
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
441446
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
@@ -451,16 +456,13 @@ def FileHeader(self, zip64=None):
451456

452457
min_version = 0
453458
if zip64 is None:
459+
# We always explicitly pass zip64 within this module.... This
460+
# remains for anyone using ZipInfo.FileHeader as a public API.
454461
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
455462
if zip64:
456463
fmt = '<HHQQ'
457464
extra = extra + struct.pack(fmt,
458465
1, struct.calcsize(fmt)-4, file_size, compress_size)
459-
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
460-
if not zip64:
461-
raise LargeZipFile("Filesize would require ZIP64 extensions")
462-
# File is larger than what fits into a 4 byte integer,
463-
# fall back to the ZIP64 extension
464466
file_size = 0xffffffff
465467
compress_size = 0xffffffff
466468
min_version = ZIP64_VERSION
@@ -1183,6 +1185,12 @@ def close(self):
11831185
self._zinfo.CRC = self._crc
11841186
self._zinfo.file_size = self._file_size
11851187

1188+
if not self._zip64:
1189+
if self._file_size > ZIP64_LIMIT:
1190+
raise RuntimeError("File size too large, try using force_zip64")
1191+
if self._compress_size > ZIP64_LIMIT:
1192+
raise RuntimeError("Compressed size too large, try using force_zip64")
1193+
11861194
# Write updated header info
11871195
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
11881196
# Write CRC and file sizes after the file data
@@ -1191,13 +1199,6 @@ def close(self):
11911199
self._zinfo.compress_size, self._zinfo.file_size))
11921200
self._zipfile.start_dir = self._fileobj.tell()
11931201
else:
1194-
if not self._zip64:
1195-
if self._file_size > ZIP64_LIMIT:
1196-
raise RuntimeError(
1197-
'File size unexpectedly exceeded ZIP64 limit')
1198-
if self._compress_size > ZIP64_LIMIT:
1199-
raise RuntimeError(
1200-
'Compressed size unexpectedly exceeded ZIP64 limit')
12011202
# Seek backwards and write file header (which will now include
12021203
# correct CRC and file sizes)
12031204

@@ -1633,8 +1634,9 @@ def _open_to_write(self, zinfo, force_zip64=False):
16331634
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
16341635

16351636
# Compressed size can be larger than uncompressed size
1636-
zip64 = self._allowZip64 and \
1637-
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
1637+
zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
1638+
if not self._allowZip64 and zip64:
1639+
raise LargeZipFile("Filesize would require ZIP64 extensions")
16381640

16391641
if self._seekable:
16401642
self.fp.seek(self.start_dir)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix ``zipfile.Zipfile`` creating invalid zip files when ``force_zip64`` was
2+
used to add files to them. Patch by Carey Metcalfe.

0 commit comments

Comments
 (0)