Skip to content

Commit

Permalink
PEX zips now contain directory entries. (#1022)
Browse files Browse the repository at this point in the history
Previously they did not which led to PEP-420 namespace packages not
working in `--zip-safe` context.

Fixes #1021
  • Loading branch information
jsirois authored Aug 29, 2020
1 parent 026f5e8 commit 51dbf84
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 21 deletions.
57 changes: 42 additions & 15 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import threading
import time
import zipfile
from collections import defaultdict
from collections import defaultdict, namedtuple
from contextlib import contextmanager
from datetime import datetime
from uuid import uuid4
Expand Down Expand Up @@ -100,14 +100,17 @@ def teardown(self):
class PermPreservingZipFile(zipfile.ZipFile, object):
"""A ZipFile that works around https://bugs.python.org/issue15795."""

class ZipEntry(namedtuple("ZipEntry", ["info", "data"])):
pass

@classmethod
def zip_info_from_file(cls, filename, arcname=None, date_time=None):
"""Construct a ZipInfo for a file on the filesystem.
def zip_entry_from_file(cls, filename, arcname=None, date_time=None):
"""Construct a ZipEntry for a file on the filesystem.
Usually this is provided directly as a method of ZipInfo, but it is not implemented in Python
2.7 so we re-implement it here. The main divergance we make from the original is adding a
parameter for the datetime (a time.struct_time), which allows us to use a deterministic
timestamp. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495.
Usually a similar `zip_info_from_file` method is provided by `ZipInfo`, but it is not
implemented in Python 2.7 so we re-implement it here to construct the `info` for `ZipEntry`
adding the possibility to control the `ZipInfo` date_time separately from the underlying
file mtime. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495.
"""
st = os.stat(filename)
isdir = stat.S_ISDIR(st.st_mode)
Expand All @@ -125,9 +128,14 @@ def zip_info_from_file(cls, filename, arcname=None, date_time=None):
if isdir:
zinfo.file_size = 0
zinfo.external_attr |= 0x10 # MS-DOS directory flag
zinfo.compress_type = zipfile.ZIP_STORED
data = b""
else:
zinfo.file_size = st.st_size
return zinfo
zinfo.compress_type = zipfile.ZIP_DEFLATED
with open(filename, "rb") as fp:
data = fp.read()
return cls.ZipEntry(info=zinfo, data=data)

def _extract_member(self, member, targetpath, pwd):
result = super(PermPreservingZipFile, self)._extract_member(member, targetpath, pwd)
Expand Down Expand Up @@ -525,15 +533,34 @@ def delete(self):

def zip(self, filename, mode="w", deterministic_timestamp=False):
with open_zip(filename, mode) as zf:
for f in sorted(self.files()):
full_path = os.path.join(self.chroot, f)
zinfo = zf.zip_info_from_file(

def write_entry(path):
full_path = os.path.join(self.chroot, path)
zip_entry = zf.zip_entry_from_file(
filename=full_path,
arcname=f,
arcname=path,
date_time=DETERMINISTIC_DATETIME.timetuple()
if deterministic_timestamp
else None,
)
with open(full_path, "rb") as open_f:
data = open_f.read()
zf.writestr(zinfo, data, compress_type=zipfile.ZIP_DEFLATED)
zf.writestr(zip_entry.info, zip_entry.data)

def get_parent_dir(path):
parent_dir = os.path.normpath(os.path.dirname(path))
if parent_dir and parent_dir != os.curdir:
return parent_dir
return None

written_dirs = set()

def maybe_write_parent_dirs(path):
parent_dir = get_parent_dir(path)
if parent_dir is None or parent_dir in written_dirs:
return
maybe_write_parent_dirs(parent_dir)
write_entry(parent_dir)
written_dirs.add(parent_dir)

for f in sorted(self.files()):
maybe_write_parent_dirs(f)
write_entry(f)
7 changes: 7 additions & 0 deletions pex/third_party/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,16 @@ def isolated():

module = "pex"

# TODO(John Sirois): Unify with `pex.util.DistributionHelper.access_zipped_assets`.
def recursive_copy(srcdir, dstdir):
os.mkdir(dstdir)
for entry_name in resource_listdir(module, srcdir):
if not entry_name:
# The `resource_listdir` function returns a '' entry name for the directory
# entry itself if it is either present on the filesystem or present as an
# explicit zip entry. Since we only care about files and subdirectories at this
# point, skip these entries.
continue
# NB: Resource path components are always separated by /, on all systems.
src_entry = "{}/{}".format(srcdir, entry_name) if srcdir else entry_name
dst_entry = os.path.join(dstdir, entry_name)
Expand Down
8 changes: 7 additions & 1 deletion pex/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ def access_zipped_assets(cls, static_module_name, static_path, dir_location=None
:returns temp_dir: Temporary directory with the zipped assets inside
:rtype: str
"""

# asset_path is initially a module name that's the same as the static_path, but will be
# changed to walk the directory tree
# TODO(John Sirois): Unify with `pex.third_party.isolated(recursive_copy)`.
def walk_zipped_assets(static_module_name, static_path, asset_path, temp_dir):
for asset in resource_listdir(static_module_name, asset_path):
if not asset:
# The `resource_listdir` function returns a '' asset for the directory entry
# itself if it is either present on the filesystem or present as an explicit
# zip entry. Since we only care about files and subdirectories at this point,
# skip these assets.
continue
asset_target = os.path.normpath(
os.path.join(os.path.relpath(asset_path, static_path), asset)
)
Expand Down
18 changes: 18 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
atomic_directory,
can_write_dir,
chmod_plus_x,
open_zip,
temporary_dir,
touch,
)
Expand Down Expand Up @@ -181,6 +182,23 @@ def test_chroot_perms_link_cross_device():
assert_chroot_perms(Chroot.link)


def test_chroot_zip():
with temporary_dir() as tmp:
chroot = Chroot(os.path.join(tmp, "chroot"))
chroot.write(b"data", "directory/subdirectory/file")
zip_dst = os.path.join(tmp, "chroot.zip")
chroot.zip(zip_dst)
with open_zip(zip_dst) as zip:
assert [
"directory/",
"directory/subdirectory/",
"directory/subdirectory/file",
] == sorted(zip.namelist())
assert b"" == zip.read("directory/")
assert b"" == zip.read("directory/subdirectory/")
assert b"data" == zip.read("directory/subdirectory/file")


def test_can_write_dir_writeable_perms():
with temporary_dir() as writeable:
assert can_write_dir(writeable)
Expand Down
38 changes: 33 additions & 5 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,34 @@ def test_pex_source_bundling():
assert stdout == b"hello\n"


def test_pex_source_bundling_pep420():
with temporary_dir() as output_dir:
with temporary_dir() as input_dir:
with safe_open(os.path.join(input_dir, "a/b/c.py"), "w") as fh:
fh.write("GREETING = 'hello'")

with open(os.path.join(input_dir, "exe.py"), "w") as fh:
fh.write(
dedent(
"""
from a.b.c import GREETING
print(GREETING)
"""
)
)

pex_path = os.path.join(output_dir, "pex1.pex")
py36 = ensure_python_interpreter(PY36)
res = run_pex_command(["-o", pex_path, "-D", input_dir, "-e", "exe"], python=py36)
res.assert_success()

stdout, rc = run_simple_pex(pex_path, interpreter=PythonInterpreter.from_binary(py36))

assert rc == 0
assert stdout == b"hello\n"


def test_pex_resource_bundling():
with temporary_dir() as output_dir:
with temporary_dir() as input_dir, temporary_dir() as resources_input_dir:
Expand Down Expand Up @@ -1640,11 +1668,11 @@ def create_pex(path, seed):
zf1.extractall(path=unzipped1)
zf2.extractall(path=unzipped2)
for member1, member2 in zip(sorted(zf1.namelist()), sorted(zf2.namelist())):
assert filecmp.cmp(
os.path.join(unzipped1, member1),
os.path.join(unzipped2, member2),
shallow=False,
)
member1_path = os.path.join(unzipped1, member1)
if os.path.isdir(member1_path):
continue
member2_path = os.path.join(unzipped2, member2)
assert filecmp.cmp(member1_path, member2_path, shallow=False)
# Then compare the original .pex files. This is the assertion we truly care about.
assert filecmp.cmp(pex1, pex2, shallow=False)

Expand Down

0 comments on commit 51dbf84

Please sign in to comment.