Skip to content

Commit

Permalink
Improvements to the Python metadata fetch script (#4780)
Browse files Browse the repository at this point in the history
This fell out of my investigation of
#4774 but the bug was fixed by the
reporter in #4775

- Adds support for `GH_TOKEN` authentication again — basically needed to
avoid rate limits when hacking on this.
- Clarifies some handling and logging of flavors
  • Loading branch information
zanieb committed Jul 3, 2024
1 parent 81442f0 commit c0875fd
Showing 1 changed file with 37 additions and 18 deletions.
55 changes: 37 additions & 18 deletions crates/uv-python/fetch-download-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import json
import logging
import re
import os
import urllib.error
import urllib.request
from itertools import chain
Expand Down Expand Up @@ -65,7 +66,7 @@
$
"""
)
_suffix_re = re.compile(
_flavor_re = re.compile(
r"""(?x)^(.*?)-(%s)$"""
% (
"|".join(
Expand All @@ -91,12 +92,13 @@ def parse_filename(filename):
version, triple = match.groups()
if triple.endswith("-full"):
triple = triple[:-5]
match = _suffix_re.match(triple)
match = _flavor_re.match(triple)
if match is not None:
triple, suffix = match.groups()
triple, flavor = match.groups()
else:
suffix = None
return (version, triple, suffix)
flavor = None

return (version, triple, flavor)


def normalize_triple(triple):
Expand Down Expand Up @@ -132,7 +134,7 @@ def normalize_os(os):

def read_sha256(url):
try:
resp = urllib.request.urlopen(url + ".sha256")
resp = request(url + ".sha256")
except urllib.error.HTTPError:
return None
assert resp.status == 200
Expand All @@ -153,8 +155,9 @@ def sha256(path):
return h.hexdigest()


def _sort_by_flavor_preference(info):
_triple, flavor, _url = info
def _get_flavor_priority(flavor):
"""
Returns the priority of a flavor. Lower is better."""
try:
pref = FLAVOR_PREFERENCES.index(flavor)
except ValueError:
Expand All @@ -167,6 +170,14 @@ def _sort_by_interpreter_and_version(info):
return (interpreter, version_tuple)


def request(url):
request = urllib.request.Request(url)
token = os.getenv("GH_TOKEN")
if token:
request.add_header("Authorization", "Bearer: {token}")
return urllib.request.urlopen(request)


def find():
"""
Find available Python versions and write metadata to a file.
Expand All @@ -176,7 +187,7 @@ def find():
# Collect all available Python downloads
for page in range(1, 100):
logging.debug("Reading release page %s...", page)
resp = urllib.request.urlopen("%s?page=%d" % (RELEASE_URL, page))
resp = request("%s?page=%d" % (RELEASE_URL, page))
rows = json.loads(resp.read())
if not rows:
break
Expand All @@ -194,20 +205,29 @@ def find():
continue
triple = normalize_triple(triple)
if triple is None:
logging.debug("Skipping %s: unsupported triple", url)
continue
results.setdefault(py_ver, []).append((triple, flavor, url))

# Collapse CPython variants to a single URL flavor per triple
cpython_results: dict[tuple[int, int, int], dict[tuple[str, str, str], str]] = {}
for py_ver, choices in results.items():
urls = {}
for triple, flavor, url in sorted(choices, key=_sort_by_flavor_preference):
for triple, flavor, url in choices:
triple = tuple(triple.split("-"))
# Skip existing triples, preferring the first flavor
if triple in urls:
continue
urls[triple] = url
cpython_results[tuple(map(int, py_ver.split(".")))] = urls
priority = _get_flavor_priority(flavor)
existing = urls.get(triple)
if existing:
_, _, existing_priority = existing
# Skip if we have a flavor with higher priority already
if priority >= existing_priority:
continue
urls[triple] = (url, flavor, priority)

# Drop the priorities
cpython_results[tuple(map(int, py_ver.split(".")))] = {
triple: (url, flavor) for triple, (url, flavor, _) in urls.items()
}

# Collect variants across interpreter kinds
# TODO(zanieb): Note we only support CPython downloads at this time
Expand All @@ -223,17 +243,16 @@ def find():
):
# Sort by the remaining information for determinism
# This groups download metadata in triple component order
for (arch, operating_system, libc), url in sorted(choices.items()):
for (arch, operating_system, libc), (url, flavor) in sorted(choices.items()):
key = "%s-%s.%s.%s-%s-%s-%s" % (
interpreter,
*py_ver,
operating_system,
arch,
libc,
)
logging.info("Found %s", key)
logging.info("Found %s (%s)", key, flavor)
sha256 = read_sha256(url)

final_results[key] = {
"name": interpreter,
"arch": arch,
Expand Down

0 comments on commit c0875fd

Please sign in to comment.