Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PEX zips now contain directory entries. #1022

Merged
merged 2 commits into from
Aug 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 42 additions & 15 deletions pex/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import threading
import time
import zipfile
from collections import defaultdict
from collections import defaultdict, namedtuple
from contextlib import contextmanager
from datetime import datetime
from uuid import uuid4
Expand Down Expand Up @@ -100,14 +100,17 @@ def teardown(self):
class PermPreservingZipFile(zipfile.ZipFile, object):
"""A ZipFile that works around https://bugs.python.org/issue15795."""

class ZipEntry(namedtuple("ZipEntry", ["info", "data"])):
pass

@classmethod
def zip_info_from_file(cls, filename, arcname=None, date_time=None):
"""Construct a ZipInfo for a file on the filesystem.
def zip_entry_from_file(cls, filename, arcname=None, date_time=None):
"""Construct a ZipEntry for a file on the filesystem.

Usually this is provided directly as a method of ZipInfo, but it is not implemented in Python
2.7 so we re-implement it here. The main divergance we make from the original is adding a
parameter for the datetime (a time.struct_time), which allows us to use a deterministic
timestamp. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495.
Usually a similar `zip_info_from_file` method is provided by `ZipInfo`, but it is not
implemented in Python 2.7 so we re-implement it here to construct the `info` for `ZipEntry`
adding the possibility to control the `ZipInfo` date_time separately from the underlying
file mtime. See https://github.com/python/cpython/blob/master/Lib/zipfile.py#L495.
"""
st = os.stat(filename)
isdir = stat.S_ISDIR(st.st_mode)
Expand All @@ -125,9 +128,14 @@ def zip_info_from_file(cls, filename, arcname=None, date_time=None):
if isdir:
zinfo.file_size = 0
zinfo.external_attr |= 0x10 # MS-DOS directory flag
zinfo.compress_type = zipfile.ZIP_STORED
data = b""
else:
zinfo.file_size = st.st_size
return zinfo
zinfo.compress_type = zipfile.ZIP_DEFLATED
with open(filename, "rb") as fp:
data = fp.read()
return cls.ZipEntry(info=zinfo, data=data)

def _extract_member(self, member, targetpath, pwd):
result = super(PermPreservingZipFile, self)._extract_member(member, targetpath, pwd)
Expand Down Expand Up @@ -525,15 +533,34 @@ def delete(self):

def zip(self, filename, mode="w", deterministic_timestamp=False):
with open_zip(filename, mode) as zf:
for f in sorted(self.files()):
full_path = os.path.join(self.chroot, f)
zinfo = zf.zip_info_from_file(

def write_entry(path):
full_path = os.path.join(self.chroot, path)
zip_entry = zf.zip_entry_from_file(
filename=full_path,
arcname=f,
arcname=path,
date_time=DETERMINISTIC_DATETIME.timetuple()
if deterministic_timestamp
else None,
)
with open(full_path, "rb") as open_f:
data = open_f.read()
zf.writestr(zinfo, data, compress_type=zipfile.ZIP_DEFLATED)
zf.writestr(zip_entry.info, zip_entry.data)

def get_parent_dir(path):
parent_dir = os.path.normpath(os.path.dirname(path))
if parent_dir and parent_dir != os.curdir:
return parent_dir
return None

written_dirs = set()

def maybe_write_parent_dirs(path):
parent_dir = get_parent_dir(path)
if parent_dir is None or parent_dir in written_dirs:
return
maybe_write_parent_dirs(parent_dir)
write_entry(parent_dir)
written_dirs.add(parent_dir)

for f in sorted(self.files()):
maybe_write_parent_dirs(f)
write_entry(f)
7 changes: 7 additions & 0 deletions pex/third_party/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,16 @@ def isolated():

module = "pex"

# TODO(John Sirois): Unify with `pex.util.DistributionHelper.access_zipped_assets`.
def recursive_copy(srcdir, dstdir):
os.mkdir(dstdir)
for entry_name in resource_listdir(module, srcdir):
if not entry_name:
# The `resource_listdir` function returns a '' entry name for the directory
# entry itself if it is either present on the filesystem or present as an
# explicit zip entry. Since we only care about files and subdirectories at this
# point, skip these entries.
continue
# NB: Resource path components are always separated by /, on all systems.
src_entry = "{}/{}".format(srcdir, entry_name) if srcdir else entry_name
dst_entry = os.path.join(dstdir, entry_name)
Expand Down
8 changes: 7 additions & 1 deletion pex/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@ def access_zipped_assets(cls, static_module_name, static_path, dir_location=None
:returns temp_dir: Temporary directory with the zipped assets inside
:rtype: str
"""

# asset_path is initially a module name that's the same as the static_path, but will be
# changed to walk the directory tree
# TODO(John Sirois): Unify with `pex.third_party.isolated(recursive_copy)`.
def walk_zipped_assets(static_module_name, static_path, asset_path, temp_dir):
for asset in resource_listdir(static_module_name, asset_path):
if not asset:
# The `resource_listdir` function returns a '' asset for the directory entry
# itself if it is either present on the filesystem or present as an explicit
# zip entry. Since we only care about files and subdirectories at this point,
# skip these assets.
continue
asset_target = os.path.normpath(
os.path.join(os.path.relpath(asset_path, static_path), asset)
)
Expand Down
18 changes: 18 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
atomic_directory,
can_write_dir,
chmod_plus_x,
open_zip,
temporary_dir,
touch,
)
Expand Down Expand Up @@ -181,6 +182,23 @@ def test_chroot_perms_link_cross_device():
assert_chroot_perms(Chroot.link)


def test_chroot_zip():
with temporary_dir() as tmp:
chroot = Chroot(os.path.join(tmp, "chroot"))
chroot.write(b"data", "directory/subdirectory/file")
zip_dst = os.path.join(tmp, "chroot.zip")
chroot.zip(zip_dst)
with open_zip(zip_dst) as zip:
assert [
"directory/",
"directory/subdirectory/",
"directory/subdirectory/file",
] == sorted(zip.namelist())
assert b"" == zip.read("directory/")
assert b"" == zip.read("directory/subdirectory/")
assert b"data" == zip.read("directory/subdirectory/file")


def test_can_write_dir_writeable_perms():
with temporary_dir() as writeable:
assert can_write_dir(writeable)
Expand Down
38 changes: 33 additions & 5 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,34 @@ def test_pex_source_bundling():
assert stdout == b"hello\n"


def test_pex_source_bundling_pep420():
with temporary_dir() as output_dir:
with temporary_dir() as input_dir:
with safe_open(os.path.join(input_dir, "a/b/c.py"), "w") as fh:
Comment on lines +1134 to +1136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, you can combine these to reduce nesting:

with temporary_dir() as output_dir, temporary_dir() as input_dir():

This is fine to keep - only want to make sure you're aware.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack. I'll stick with the copypasta from the test above for now.

fh.write("GREETING = 'hello'")

with open(os.path.join(input_dir, "exe.py"), "w") as fh:
fh.write(
dedent(
"""
from a.b.c import GREETING

print(GREETING)
"""
)
)

pex_path = os.path.join(output_dir, "pex1.pex")
py36 = ensure_python_interpreter(PY36)
res = run_pex_command(["-o", pex_path, "-D", input_dir, "-e", "exe"], python=py36)
res.assert_success()

stdout, rc = run_simple_pex(pex_path, interpreter=PythonInterpreter.from_binary(py36))

assert rc == 0
assert stdout == b"hello\n"


def test_pex_resource_bundling():
with temporary_dir() as output_dir:
with temporary_dir() as input_dir, temporary_dir() as resources_input_dir:
Expand Down Expand Up @@ -1640,11 +1668,11 @@ def create_pex(path, seed):
zf1.extractall(path=unzipped1)
zf2.extractall(path=unzipped2)
for member1, member2 in zip(sorted(zf1.namelist()), sorted(zf2.namelist())):
assert filecmp.cmp(
os.path.join(unzipped1, member1),
os.path.join(unzipped2, member2),
shallow=False,
)
member1_path = os.path.join(unzipped1, member1)
if os.path.isdir(member1_path):
continue
Comment on lines +1672 to +1673
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for skipping? Are the directory entries not byte-for-byte identical?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's because filecmp.cmp considers anything not a regular file (symlink, directory, etc.) unequal.

member2_path = os.path.join(unzipped2, member2)
assert filecmp.cmp(member1_path, member2_path, shallow=False)
# Then compare the original .pex files. This is the assertion we truly care about.
assert filecmp.cmp(pex1, pex2, shallow=False)

Expand Down