Skip to content

Commit

Permalink
[7.0.0] Optimize RepoMappingManifestAction (#20135)
Browse files Browse the repository at this point in the history
The following optimizations reduce the time spent writing the repo
mapping manifest from >6s to <80ms for a test referencing each of ~4,000
repos created by a module extension:

* The set of repository names appearing in runfiles paths is only
constructed once rather than for each repo by moving the `build()` call
out of a closure.
* The relevant entries per repository are now sorted after filtering out
those for repos that don't contribute runfiles.
* Crucially, the relevant mapping entries per repository are now cached
per instance of `RepositoryMapping#entries()`. Since extension repos all
share the same instance due to interning, this reduces the complexity
from quadratic to linear in the number of extension repos.

Closes #20091.

Commit
305ab3b

PiperOrigin-RevId: 581128978
Change-Id: I946e7788b8538e84714cf25ece89a86edd0d6948

Co-authored-by: Fabian Meumertzheim <[email protected]>
  • Loading branch information
bazel-io and fmeum authored Nov 10, 2023
1 parent b2115a7 commit 9e1fc51
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 46 deletions.
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/packages",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:caffeine",
"//third_party:guava",
"//third_party:jsr305",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -41,6 +46,7 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.IdentityHashMap;
import java.util.Map.Entry;
import java.util.UUID;
import javax.annotation.Nullable;
Expand All @@ -52,19 +58,28 @@ public final class RepoMappingManifestAction extends AbstractFileWriteAction

private static final UUID MY_UUID = UUID.fromString("458e351c-4d30-433d-b927-da6cddd4737f");

private static final LoadingCache<ImmutableMap<String, RepositoryName>, String>
repoMappingFingerprintCache =
Caffeine.newBuilder()
.weakKeys()
.build(
repoMapping -> {
Fingerprint fp = new Fingerprint();
fp.addInt(repoMapping.size());
repoMapping.forEach(
(apparentName, canonicalName) -> {
fp.addString(apparentName);
fp.addString(canonicalName.getName());
});
return fp.hexDigestAndReset();
});

// Uses MapFn's args parameter just like Fingerprint#addString to compute a cacheable fingerprint
// of just the repo name and mapping of a given Package.
private static final MapFn<Package> REPO_AND_MAPPING_DIGEST_FN =
(pkg, args) -> {
args.accept(pkg.getPackageIdentifier().getRepository().getName());

var mapping = pkg.getRepositoryMapping().entries();
args.accept(String.valueOf(mapping.size()));
mapping.forEach(
(apparentName, canonicalName) -> {
args.accept(apparentName);
args.accept(canonicalName.getName());
});
args.accept(repoMappingFingerprintCache.get(pkg.getRepositoryMapping().entries()));
};

private static final MapFn<Artifact> OWNER_REPO_FN =
Expand Down Expand Up @@ -144,67 +159,88 @@ public DeterministicWriter newDeterministicWriter() {
return out -> {
PrintWriter writer = new PrintWriter(out, /* autoFlush= */ false, ISO_8859_1);

var reposInRunfilePaths = ImmutableSet.<String>builder();

var reposInRunfilesPathsBuilder = ImmutableSet.<String>builder();
// The runfiles paths of symlinks are always prefixed with the main workspace name, *not* the
// name of the repository adding the symlink.
if (hasRunfilesSymlinks) {
reposInRunfilePaths.add(RepositoryName.MAIN.getName());
reposInRunfilesPathsBuilder.add(RepositoryName.MAIN.getName());
}

// Since root symlinks are the only way to stage a runfile at a specific path under the
// current repository's runfiles directory, recognize canonical repository names that appear
// as the first segment of their runfiles paths.
for (SymlinkEntry symlink : runfilesRootSymlinks.toList()) {
reposInRunfilePaths.add(symlink.getPath().getSegment(0));
reposInRunfilesPathsBuilder.add(symlink.getPath().getSegment(0));
}

for (Artifact artifact : runfilesArtifacts.toList()) {
Label owner = artifact.getOwner();
if (owner != null) {
reposInRunfilePaths.add(owner.getRepository().getName());
reposInRunfilesPathsBuilder.add(owner.getRepository().getName());
}
}

transitivePackages.toList().stream()
.collect(
toImmutableSortedMap(
comparing(RepositoryName::getName),
pkg -> pkg.getPackageIdentifier().getRepository(),
Package::getRepositoryMapping,
// All packages in a given repository have the same repository mapping, so the
// particular way of resolving duplicates does not matter.
(first, second) -> first))
.forEach(
(repoName, mapping) ->
writeRepoMapping(writer, reposInRunfilePaths.build(), repoName, mapping));
var reposInRunfilesPaths = reposInRunfilesPathsBuilder.build();

ImmutableSortedMap<RepositoryName, RepositoryMapping> sortedRepoMappings =
transitivePackages.toList().stream()
.collect(
toImmutableSortedMap(
comparing(RepositoryName::getName),
pkg -> pkg.getPackageIdentifier().getRepository(),
Package::getRepositoryMapping,
// All packages in a given repository have the same repository mapping, so the
// particular way of resolving duplicates does not matter.
(first, second) -> first));
// All repositories generated by a module extension have the same Map instance as the entries
// of their RepositoryMapping, with every repo appearing as an entry. If a module extension
// generates N repos and all of them are in transitivePackages, iterating over the packages
// and then over each mapping's entries would thus require time quadratic in N. We prevent
// this by caching the relevant (target apparent name, target canonical name) pairs per entry
// map instance.
IdentityHashMap<
ImmutableMap<String, RepositoryName>, ImmutableList<Entry<String, RepositoryName>>>
cachedRelevantEntries = new IdentityHashMap<>();
for (var repoAndMapping : sortedRepoMappings.entrySet()) {
cachedRelevantEntries
.computeIfAbsent(
repoAndMapping.getValue().entries(),
entries -> computeRelevantEntries(reposInRunfilesPaths, entries))
.forEach(
mappingEntry -> {
// The canonical name of the main repo is the empty string, which is not a valid
// name for a directory, so the "workspace name" is used the name of the directory
// under the runfiles tree for it.
String targetRepoDirectoryName =
mappingEntry.getValue().isMain()
? workspaceName
: mappingEntry.getValue().getName();
writer.format(
"%s,%s,%s\n",
repoAndMapping.getKey().getName(),
mappingEntry.getKey(),
targetRepoDirectoryName);
});
}
writer.flush();
};
}

private void writeRepoMapping(
PrintWriter writer,
private ImmutableList<Entry<String, RepositoryName>> computeRelevantEntries(
ImmutableSet<String> reposInRunfilesPaths,
RepositoryName repoName,
RepositoryMapping repoMapping) {
for (Entry<String, RepositoryName> mappingEntry :
ImmutableSortedMap.copyOf(repoMapping.entries()).entrySet()) {
if (mappingEntry.getKey().isEmpty()) {
ImmutableMap<String, RepositoryName> mappingEntries) {
// TODO: If this becomes a hotspot, consider iterating over reposInRunfilesPaths and looking
// up the apparent name in the inverse of mappingEntries, which ensures that the runtime is
// always linear in the number of entries ultimately emitted into the manifest and independent
// of the size of the individual mappings. This requires making RepositoryMapping#entries() an
// ImmutableBiMap, or even ImmutableMultimap since repositories can have multiple apparent
// names.
return mappingEntries.entrySet().stream()
// The apparent repo name can only be empty for the main repo. We skip this line as
// Rlocation paths can't reference an empty apparent name anyway.
continue;
}
if (!reposInRunfilesPaths.contains(mappingEntry.getValue().getName())) {
.filter(mappingEntry -> !mappingEntry.getKey().isEmpty())
// We only write entries for repos whose canonical names appear in runfiles paths.
continue;
}
// The canonical name of the main repo is the empty string, which is not a valid name for a
// directory, so the "workspace name" is used the name of the directory under the runfiles
// tree for it.
String targetRepoDirectoryName =
mappingEntry.getValue().isMain() ? workspaceName : mappingEntry.getValue().getName();
writer.format(
"%s,%s,%s\n", repoName.getName(), mappingEntry.getKey(), targetRepoDirectoryName);
}
.filter(entry -> reposInRunfilesPaths.contains(entry.getValue().getName()))
.sorted(Entry.comparingByKey())
.collect(toImmutableList());
}
}

0 comments on commit 9e1fc51

Please sign in to comment.