Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[internal] Add a release artifact size verification step. #13234

Merged
merged 2 commits into from
Oct 15, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 121 additions & 12 deletions build-support/bin/_release_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,24 @@
import xmlrpc.client
from configparser import ConfigParser
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import date
from enum import Enum
from functools import total_ordering
from math import ceil
from pathlib import Path
from tempfile import TemporaryDirectory
from time import sleep
from typing import Callable, Iterable, Iterator, NamedTuple, cast
from typing import Any, Callable, Iterable, Iterator, NamedTuple, Sequence, cast
from urllib.parse import quote_plus
from xml.etree import ElementTree

import requests
from common import banner, die, green
from packaging.version import Version
from reversion import reversion

from pants.util.memo import memoized_property
from pants.util.strutil import strip_prefix

# -----------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -116,13 +122,60 @@ def validate_package_access(self, pkg_name: str) -> None:
print(f"Roles for package {pkg_name} as expected.")


@total_ordering
class PackageVersionType(Enum):
DEV = 0
PRE = 1
STABLE = 2

@classmethod
def from_version(cls, version: Version) -> PackageVersionType:
if version.is_devrelease:
return cls.DEV
elif version.pre:
return cls.PRE
else:
return cls.STABLE

def __lt__(self, other: Any) -> bool:
if not isinstance(other, PackageVersionType):
return NotImplemented
return self.value < other.value


@dataclass(frozen=True)
class PackageVersion:
name: str
version: Version
size_mb: int
most_recent_upload_date: date

@property
def freshness_key(self) -> tuple[PackageVersionType, date, Version]:
"""A sort key of the type, the creation time, and then (although unlikely to be used) the
Version.

Sorts the "stalest" releases first, and the "freshest" releases last.
"""
return (
PackageVersionType.from_version(self.version),
self.most_recent_upload_date,
self.version,
)


@total_ordering
class Package:
def __init__(
self, name: str, target: str, validate: Callable[[str, Path, list[str]], None]
self,
name: str,
target: str,
max_size_mb: int,
validate: Callable[[str, Path, list[str]], None],
) -> None:
self.name = name
self.target = target
self.max_size_mb = max_size_mb
self.validate = validate

def __lt__(self, other):
Expand Down Expand Up @@ -151,17 +204,45 @@ def exists_on_pypi(self) -> bool: # type: ignore[return]
return False
response.raise_for_status()

def latest_published_version(self) -> str:
json_data = requests.get(f"https://pypi.org/pypi/{self.name}/json").json()
return cast(str, json_data["info"]["version"])
@memoized_property
def _json_package_data(self) -> dict[str, Any]:
return cast(dict, requests.get(f"https://pypi.org/pypi/{self.name}/json").json())

def owners(self) -> set[str]:
def can_publish(role: str) -> bool:
return role in {"Owner", "Maintainer"}
def latest_published_version(self) -> str:
return cast(str, self._json_package_data["info"]["version"])

def stale_versions(self) -> Sequence[PackageVersion]:
def pv(version: str, artifacts: list[dict[str, Any]]) -> PackageVersion | None:
upload_dates = [
date.fromisoformat(artifact["upload_time_iso_8601"].split("T")[0])
for artifact in artifacts
if "T" in artifact["upload_time_iso_8601"]
]
size_bytes = sum(int(artifact["size"]) for artifact in artifacts)
size_mb = ceil(size_bytes / 1000000)
if not upload_dates:
return None
return PackageVersion(self.name, Version(version), size_mb, max(upload_dates))

maybe_versions = [
pv(version, artifacts)
for version, artifacts in self._json_package_data["releases"].items()
if artifacts
]
versions_by_freshness_ascending = sorted(
(pv for pv in maybe_versions if pv), key=lambda pv: pv.freshness_key
)
max_artifacts_size_mb = max(pv.size_mb for pv in versions_by_freshness_ascending)

client = xmlrpc.client.ServerProxy("https://pypi.org/pypi")
roles = client.package_roles(self.name)
return {row[1] for row in roles if can_publish(row[0])} # type: ignore[union-attr,index]
# Pop versions from the end of the list (the "freshest") until we reach the threshold
# for size. We leave a little more than the max size of an artifact as buffer space in
# case a new release is particularly large.
available_mb = self.max_size_mb - (max_artifacts_size_mb * 1.1)
while versions_by_freshness_ascending:
if versions_by_freshness_ascending[-1].size_mb > available_mb:
break
available_mb -= versions_by_freshness_ascending.pop().size_mb
return versions_by_freshness_ascending
Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll probably add a safeguard here to confirm that it never suggests deleting more than N versions. But would be interested in any other safeguards folks can think of.

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any safeguard against recommending deleting non-dev releases? I would imagine we want to think 1000 times before deleting recent dev releases, rcs or, of course, stable releases.

Copy link
Sponsor Member Author

@stuhood stuhood Oct 13, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we get into very recent dev releases (it's currently recommending deleting 5+ month old dev releases) things definitely get hairy, yea. Currently it prioritizes removing dev releases before rcs before stable releases.

But perhaps we need a recency safeguard, where we rule out deleting anything younger than X months old? The trouble with that right now is that we will quickly move on to deleting stable releases... admittedly, there are some very old stable releases (pre-1.0.0 basically), which we could manually delete to free up space in the more recent releases to give us more headroom.

Copy link
Member

@kaos kaos Oct 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could add constraints per release kind to that, so instead of time based, you could say
(blacklist vs whitelist simply reverse the constraint...)

stable: <1.30
rc: <2.0
dev: <2.2

As time is less important than release cadence, I think...

Copy link
Sponsor Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed that doing something PackageVersionType-dependent would make more sense, but it would also be more complicated. I'm going to keep this simpler for now, and have added a minimum age to be eligible.



def _pip_args(extra_pip_args: list[str]) -> tuple[str, ...]:
Expand Down Expand Up @@ -245,10 +326,17 @@ def validate_testutil_pkg(version: str, venv_dir: Path, extra_pip_args: list[str

# NB: This a native wheel. We expect a distinct wheel for each Python version and each
# platform (macOS_x86 x macos_arm x linux).
PANTS_PKG = Package("pantsbuild.pants", "src/python/pants:pants-packaged", validate_pants_pkg)
PANTS_PKG = Package(
"pantsbuild.pants",
"src/python/pants:pants-packaged",
# TODO: See https://github.com/pypa/pypi-support/issues/1376.
20000,
validate_pants_pkg,
)
TESTUTIL_PKG = Package(
"pantsbuild.pants.testutil",
"src/python/pants/testutil:testutil_wheel",
20000,
validate_testutil_pkg,
)
PACKAGES = sorted({PANTS_PKG, TESTUTIL_PKG})
Expand Down Expand Up @@ -625,6 +713,7 @@ def publish() -> None:
banner("Releasing to PyPI and GitHub")
# Check prereqs.
check_clean_git_branch()
prompt_artifact_freshness()
check_pgp()
check_roles()

Expand Down Expand Up @@ -792,6 +881,23 @@ def upload_wheels_via_twine() -> None:
)


def prompt_artifact_freshness() -> None:
stale_versions = [
stale_version for package in PACKAGES for stale_version in package.stale_versions()
]
if stale_versions:
print("\n".join(f"Stale:\n {sv}" for sv in stale_versions))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be more useful if you implemented __str__ for the type. The comment you posted was a bit chatty.

input(
"\nTo ensure that there is adequate storage for new artifacts, the stale release "
"artifacts listed above should be deleted via [https://pypi.org/]'s UI.\n"
"If you have any concerns about the listed artifacts, please raise them on Slack "
"or on [https://github.com/pantsbuild/pants/issues/11614].\n"
"Press enter when you have deleted the listed artifacts: "
stuhood marked this conversation as resolved.
Show resolved Hide resolved
)
else:
print("No stale artifacts detected.")


def prompt_apple_silicon() -> None:
input(
f"We need to release for Apple Silicon. Please message Eric on Slack asking to release "
Expand Down Expand Up @@ -947,6 +1053,7 @@ def create_parser() -> argparse.ArgumentParser:
subparsers.add_parser("build-local-pex")
subparsers.add_parser("build-universal-pex")
subparsers.add_parser("validate-roles")
subparsers.add_parser("validate-freshness")
subparsers.add_parser("list-prebuilt-wheels")
subparsers.add_parser("check-pants-wheels")
return parser
Expand All @@ -970,6 +1077,8 @@ def main() -> None:
build_pex(fetch=True)
if args.command == "validate-roles":
PackageAccessValidator.validate_all()
if args.command == "validate-freshness":
prompt_artifact_freshness()
if args.command == "list-prebuilt-wheels":
list_prebuilt_wheels()
if args.command == "check-pants-wheels":
Expand Down