diff --git a/pex/common.py b/pex/common.py index ae60a99f1..bae3370b1 100644 --- a/pex/common.py +++ b/pex/common.py @@ -14,7 +14,7 @@ import threading import time import zipfile -from collections import defaultdict +from collections import defaultdict, namedtuple from contextlib import contextmanager from datetime import datetime from uuid import uuid4 @@ -100,14 +100,17 @@ def teardown(self): class PermPreservingZipFile(zipfile.ZipFile, object): """A ZipFile that works around https://bugs.python.org/issue15795.""" + class ZipEntry(namedtuple("ZipEntry", ["info", "data"])): + pass + @classmethod - def zip_info_from_file(cls, filename, arcname=None, date_time=None): - """Construct a ZipInfo for a file on the filesystem. + def zip_entry_from_file(cls, filename, arcname=None, date_time=None): + """Construct a ZipEntry for a file on the filesystem. - Usually this is provided directly as a method of ZipInfo, but it is not implemented in Python - 2.7 so we re-implement it here. The main divergance we make from the original is adding a - parameter for the datetime (a time.struct_time), which allows us to use a deterministic - timestamp. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495. + Usually a similar `zip_info_from_file` method is provided by `ZipInfo`, but it is not + implemented in Python 2.7 so we re-implement it here to construct the `info` for `ZipEntry` + adding the possibility to control the `ZipInfo` date_time separately from the underlying + file mtime. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495. """ st = os.stat(filename) isdir = stat.S_ISDIR(st.st_mode) @@ -125,9 +128,14 @@ def zip_info_from_file(cls, filename, arcname=None, date_time=None): if isdir: zinfo.file_size = 0 zinfo.external_attr |= 0x10 # MS-DOS directory flag + zinfo.compress_type = zipfile.ZIP_STORED + data = b"" else: zinfo.file_size = st.st_size - return zinfo + zinfo.compress_type = zipfile.ZIP_DEFLATED + with open(filename, "rb") as fp: + data = fp.read() + return cls.ZipEntry(info=zinfo, data=data) def _extract_member(self, member, targetpath, pwd): result = super(PermPreservingZipFile, self)._extract_member(member, targetpath, pwd) @@ -525,15 +533,34 @@ def delete(self): def zip(self, filename, mode="w", deterministic_timestamp=False): with open_zip(filename, mode) as zf: - for f in sorted(self.files()): - full_path = os.path.join(self.chroot, f) - zinfo = zf.zip_info_from_file( + + def write_entry(path): + full_path = os.path.join(self.chroot, path) + zip_entry = zf.zip_entry_from_file( filename=full_path, - arcname=f, + arcname=path, date_time=DETERMINISTIC_DATETIME.timetuple() if deterministic_timestamp else None, ) - with open(full_path, "rb") as open_f: - data = open_f.read() - zf.writestr(zinfo, data, compress_type=zipfile.ZIP_DEFLATED) + zf.writestr(zip_entry.info, zip_entry.data) + + def get_parent_dir(path): + parent_dir = os.path.normpath(os.path.dirname(path)) + if parent_dir and parent_dir != os.curdir: + return parent_dir + return None + + written_dirs = set() + + def maybe_write_parent_dirs(path): + parent_dir = get_parent_dir(path) + if parent_dir is None or parent_dir in written_dirs: + return + maybe_write_parent_dirs(parent_dir) + write_entry(parent_dir) + written_dirs.add(parent_dir) + + for f in sorted(self.files()): + maybe_write_parent_dirs(f) + write_entry(f) diff --git a/pex/util.py b/pex/util.py index e225b2787..babff07d8 100644 --- a/pex/util.py +++ b/pex/util.py @@ -32,11 +32,16 @@ def access_zipped_assets(cls, static_module_name, static_path, dir_location=None :returns temp_dir: Temporary directory with the zipped assets inside :rtype: str """ - # asset_path is initially a module name that's the same as the static_path, but will be # changed to walk the directory tree def walk_zipped_assets(static_module_name, static_path, asset_path, temp_dir): for asset in resource_listdir(static_module_name, asset_path): + if not asset: + # The `resource_listdir` function returns a '' asset for the directory entry + # itself if it is either present on the filesystem or present as an explicit + # zip entry. Since we only care about files and subdirectories at this point, + # skip these assets. + continue asset_target = os.path.normpath( os.path.join(os.path.relpath(asset_path, static_path), asset) ) diff --git a/tests/test_common.py b/tests/test_common.py index e79f9e3fc..a357953ab 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -15,6 +15,7 @@ atomic_directory, can_write_dir, chmod_plus_x, + open_zip, temporary_dir, touch, ) @@ -181,6 +182,20 @@ def test_chroot_perms_link_cross_device(): assert_chroot_perms(Chroot.link) +def test_chroot_zip(): + with temporary_dir() as tmp: + chroot = Chroot(os.path.join(tmp, "chroot")) + chroot.write("data", "directory/subdirectory/file") + zip_dst = os.path.join(tmp, "chroot.zip") + chroot.zip(zip_dst) + with open_zip(zip_dst) as zip: + assert [ + "directory/", + "directory/subdirectory/", + "directory/subdirectory/file", + ] == sorted(zip.namelist()) + + def test_can_write_dir_writeable_perms(): with temporary_dir() as writeable: assert can_write_dir(writeable) diff --git a/tests/test_integration.py b/tests/test_integration.py index c877e932f..a76084961 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1112,6 +1112,33 @@ def test_pex_source_bundling(): assert stdout == b"hello\n" +def test_pex_source_bundling_pep420(): + with temporary_dir() as output_dir: + with temporary_dir() as input_dir: + with safe_open(os.path.join(input_dir, "a/b/c.py"), "w") as fh: + fh.write("GREETING = 'hello'") + + with open(os.path.join(input_dir, "exe.py"), "w") as fh: + fh.write( + dedent( + """ + from a.b.c import GREETING + + print(GREETING) + """ + ) + ) + + pex_path = os.path.join(output_dir, "pex1.pex") + res = run_pex_command(["-o", pex_path, "-D", input_dir, "-e", "exe",]) + res.assert_success() + + stdout, rc = run_simple_pex(pex_path) + + assert rc == 0 + assert stdout == b"hello\n" + + def test_pex_resource_bundling(): with temporary_dir() as output_dir: with temporary_dir() as input_dir, temporary_dir() as resources_input_dir: @@ -1619,11 +1646,11 @@ def create_pex(path, seed): zf1.extractall(path=unzipped1) zf2.extractall(path=unzipped2) for member1, member2 in zip(sorted(zf1.namelist()), sorted(zf2.namelist())): - assert filecmp.cmp( - os.path.join(unzipped1, member1), - os.path.join(unzipped2, member2), - shallow=False, - ) + member1_path = os.path.join(unzipped1, member1) + if os.path.isdir(member1_path): + continue + member2_path = os.path.join(unzipped2, member2) + assert filecmp.cmp(member1_path, member2_path, shallow=False) # Then compare the original .pex files. This is the assertion we truly care about. assert filecmp.cmp(pex1, pex2, shallow=False) diff --git a/tests/test_util.py b/tests/test_util.py index a2d4ef361..05aad28e7 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -119,7 +119,7 @@ def assert_access_zipped_assets(distribution_helper_import): blocking=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = process.communicate() - assert process.returncode == 0 + assert process.returncode == 0, stderr assert b"accessed\n" == stdout return stderr