Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ END_UNRELEASED_TEMPLATE

{#v0-0-0-added}
### Added
* (runfiles) The Python runfiles library now supports Bazel's
`--incompatible_compact_repo_mapping_manifest` flag, which uses prefix-based
repository mappings to reduce memory usage for large dependency graphs under
bzlmod. This allows the repository mapping manifest to be significantly
smaller (from tens of megabytes to much less) while maintaining full
functionality.
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
{obj}`main_module` attribute.
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
Expand Down
169 changes: 127 additions & 42 deletions python/runfiles/runfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,126 @@
"""Runfiles lookup library for Bazel-built Python binaries and tests.

See @rules_python//python/runfiles/README.md for usage instructions.

:::{versionadded} VERSION_NEXT_FEATURE
Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added.
This enables prefix-based repository mappings to reduce memory usage for large
dependency graphs under bzlmod.
:::
"""
import collections.abc
from collections import defaultdict
import inspect
import os
import posixpath
import sys
from typing import Dict, Optional, Tuple, Union
from typing import Dict, Iterator, Optional, Tuple, Union


class _RepositoryMapping(collections.abc.Mapping):
"""Repository mapping for resolving apparent repository names to canonical ones.

Handles both exact mappings and prefix-based mappings introduced by the
--incompatible_compact_repo_mapping_manifest flag.
"""

def __init__(
self,
exact_mappings: Dict[Tuple[str, str], str],
prefixed_mappings: Dict[Tuple[str, str], str]
) -> None:
"""Initialize repository mapping with exact and prefixed mappings.

Args:
exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical
prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical
"""
self._exact_mappings = exact_mappings
self._prefixed_mappings = prefixed_mappings

# Group prefixed mappings by target_apparent for faster lookups
self._grouped_prefixed_mappings = defaultdict(list)
for (prefix_source, target_app), target_canonical in self._prefixed_mappings.items():
self._grouped_prefixed_mappings[target_app].append((prefix_source, target_canonical))

@staticmethod
def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping":
"""Create RepositoryMapping from a repository mapping manifest file.

Args:
repo_mapping_path: Path to the repository mapping file, or None if not available

Returns:
RepositoryMapping instance with parsed mappings
"""
# If the repository mapping file can't be found, that is not an error: We
# might be running without Bzlmod enabled or there may not be any runfiles.
# In this case, just apply empty repo mappings.
if not repo_mapping_path:
return _RepositoryMapping({}, {})

try:
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
except FileNotFoundError:
return _RepositoryMapping({}, {})

exact_mappings = {}
prefixed_mappings = {}
for line in content.splitlines():
source_canonical, target_apparent, target_canonical = line.split(",")
if source_canonical.endswith("*"):
# This is a prefixed mapping - remove the '*' for prefix matching
prefix = source_canonical[:-1]
prefixed_mappings[(prefix, target_apparent)] = target_canonical
else:
# This is an exact mapping
exact_mappings[(source_canonical, target_apparent)] = target_canonical

return _RepositoryMapping(exact_mappings, prefixed_mappings)

# Mapping protocol implementation
def __getitem__(self, key: Tuple[str, str]) -> str:
"""Get repository mapping for (source_canonical, target_apparent) key.

This handles both exact mappings and prefix-based mappings introduced by the
--incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried
first, followed by prefix-based mappings where order matters.

Args:
key: Tuple of (source_canonical, target_apparent)

Returns:
target_canonical repository name

Raises:
KeyError: if no mapping exists for the key
"""
source_repo, target_apparent = key

# Try exact mapping first
if key in self._exact_mappings:
return self._exact_mappings[key]

# Try prefixed mapping if no exact match found
if target_apparent in self._grouped_prefixed_mappings:
for prefix_source, target_canonical in self._grouped_prefixed_mappings[target_apparent]:
if source_repo.startswith(prefix_source):
return target_canonical

# No mapping found
raise KeyError(key)

def __iter__(self) -> Iterator[Tuple[str, str]]:
"""Iterate over all mapping keys (exact first, then prefixed)."""
# First yield all exact mapping keys
yield from self._exact_mappings.keys()
# Then yield all prefixed mapping keys
yield from self._prefixed_mappings.keys()

def __len__(self) -> int:
"""Return the total number of mappings (exact + prefixed)."""
return len(self._exact_mappings) + len(self._prefixed_mappings)


class _ManifestBased:
Expand Down Expand Up @@ -130,7 +244,7 @@ class Runfiles:
def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None:
self._strategy = strategy
self._python_runfiles_root = _FindPythonRunfilesRoot()
self._repo_mapping = _ParseRepoMapping(
self._repo_mapping = _RepositoryMapping.create_from_file(
strategy.RlocationChecked("_repo_mapping")
)

Expand Down Expand Up @@ -188,25 +302,20 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
# Split off the first path component, which contains the repository
# name (apparent or canonical).
target_repo, _, remainder = path.partition("/")
if not remainder or (source_repo, target_repo) not in self._repo_mapping:
# One of the following is the case:
# - not using Bzlmod, so the repository mapping is empty and
# apparent and canonical repository names are the same
# - target_repo is already a canonical repository name and does not
# have to be mapped.
# - path did not contain a slash and referred to a root symlink,
# which also should not be mapped.
if not remainder:
# path did not contain a slash and referred to a root symlink,
# which should not be mapped.
return self._strategy.RlocationChecked(path)

assert (
source_repo is not None
), "BUG: if the `source_repo` is None, we should never go past the `if` statement above"
# Look up the target repository using the repository mapping
if source_repo is not None:
target_canonical = self._repo_mapping.get((source_repo, target_repo))
if target_canonical is not None:
return self._strategy.RlocationChecked(target_canonical + "/" + remainder)

# target_repo is an apparent repository name. Look up the corresponding
# canonical repository name with respect to the current repository,
# identified by its canonical name.
target_canonical = self._repo_mapping[(source_repo, target_repo)]
return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
# No mapping found - assume target_repo is already canonical or
# we're not using Bzlmod
return self._strategy.RlocationChecked(path)

def EnvVars(self) -> Dict[str, str]:
"""Returns environment variables for subprocesses.
Expand Down Expand Up @@ -359,30 +468,6 @@ def _FindPythonRunfilesRoot() -> str:
return root


def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]:
"""Parses the repository mapping manifest."""
# If the repository mapping file can't be found, that is not an error: We
# might be running without Bzlmod enabled or there may not be any runfiles.
# In this case, just apply an empty repo mapping.
if not repo_mapping_path:
return {}
try:
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
except FileNotFoundError:
return {}

repo_mapping = {}
for line in content.split("\n"):
if not line:
# Empty line following the last line break
break
current_canonical, target_local, target_canonical = line.split(",")
repo_mapping[(current_canonical, target_local)] = target_canonical

return repo_mapping


def CreateManifestBased(manifest_path: str) -> Runfiles:
return Runfiles.CreateManifestBased(manifest_path)

Expand Down
Loading