Skip to content

Commit

Permalink
[7.1.0] Collect directory contents in parallel in CompactSpawnLogCont…
Browse files Browse the repository at this point in the history
…ext.

Significantly reduces time spent computing a log entry for large tree artifacts (in a particular execution with ~250k files, the time is reduced from ~72s to ~6s).

This is the same idea as 368bf11, later improved by df07d27.

In theory, the optimization could also be applied to ExpandedSpawnLogContext. However, I don't want to invest resources into improving the old log formats at this time.

Also sort the directory contents alphabetically, fixing an oversight in the previous implementation.

PiperOrigin-RevId: 607126723
Change-Id: I0ebcdbd5e897e25b5b0c28a642e50acae2fd6730
  • Loading branch information
tjgq committed Feb 14, 2024
1 parent b5a9262 commit 1c6c015
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,18 @@ private void initOutputs(CommandEnvironment env) throws IOException {
Path outputBase = env.getOutputBase();

if (executionOptions.executionLogCompactFile != null) {
try {
spawnLogContext =
new CompactSpawnLogContext(
workingDirectory.getRelative(executionOptions.executionLogCompactFile),
env.getExecRoot().asFragment(),
env.getOptions().getOptions(RemoteOptions.class),
env.getRuntime().getFileSystem().getDigestFunction(),
env.getXattrProvider());
} catch (InterruptedException e) {
env.getReporter()
.handle(Event.error("Error while setting up the execution log: " + e.getMessage()));
}
} else {
Path outputPath = null;
Encoding encoding = null;
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/exec/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:runfiles_supplier",
"//src/main/java/com/google/devtools/build/lib/analysis/platform:platform_utils",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/remote/options",
"//src/main/java/com/google/devtools/build/lib/util/io",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.exec;

import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import com.github.luben.zstd.ZstdOutputStream;
Expand All @@ -27,6 +28,9 @@
import com.google.devtools.build.lib.actions.Spawns;
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor;
import com.google.devtools.build.lib.concurrent.ErrorClassifier;
import com.google.devtools.build.lib.concurrent.NamedForkJoinPool;
import com.google.devtools.build.lib.exec.Protos.Digest;
import com.google.devtools.build.lib.exec.Protos.ExecLogEntry;
import com.google.devtools.build.lib.exec.Protos.Platform;
Expand All @@ -37,6 +41,7 @@
import com.google.devtools.build.lib.util.io.MessageOutputStream;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.FileSystem;
import com.google.devtools.build.lib.vfs.IORuntimeException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.XattrProvider;
Expand All @@ -45,18 +50,84 @@
import java.io.BufferedOutputStream;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.concurrent.ForkJoinPool;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;

/** A {@link SpawnLogContext} implementation that produces a log in compact format. */
public class CompactSpawnLogContext extends SpawnLogContext {

private static final Comparator<ExecLogEntry.File> EXEC_LOG_ENTRY_FILE_COMPARATOR =
Comparator.comparing(ExecLogEntry.File::getPath);

private static final ForkJoinPool VISITOR_POOL =
NamedForkJoinPool.newNamedPool(
"execlog-directory-visitor", Runtime.getRuntime().availableProcessors());

/** Visitor for use in {@link #visitDirectory}. */
protected interface DirectoryChildVisitor {
void visit(Path path) throws IOException;
}

private static class DirectoryVisitor extends AbstractQueueVisitor {
private final Path rootDir;
private final DirectoryChildVisitor childVisitor;

private DirectoryVisitor(Path rootDir, DirectoryChildVisitor childVisitor) {
super(
VISITOR_POOL,
ExecutorOwnership.SHARED,
ExceptionHandlingMode.FAIL_FAST,
ErrorClassifier.DEFAULT);
this.rootDir = checkNotNull(rootDir);
this.childVisitor = checkNotNull(childVisitor);
}

private void run() throws IOException, InterruptedException {
execute(() -> visitSubdirectory(rootDir));
try {
awaitQuiescence(true);
} catch (IORuntimeException e) {
throw e.getCauseIOException();
}
}

private void visitSubdirectory(Path dir) {
try {
for (Path child : dir.getDirectoryEntries()) {
if (child.isDirectory()) {
execute(() -> visitSubdirectory(child));
continue;
}
childVisitor.visit(child);
}
} catch (IOException e) {
throw new IORuntimeException(e);
}
}
}

/**
* Visits a directory hierarchy in parallel.
*
* <p>Calls {@code childVisitor} for every descendant path of {@code rootDir} that isn't itself a
* directory, following symlinks. The visitor may be concurrently called by multiple threads, and
* must synchronize accesses to shared data.
*/
private void visitDirectory(Path rootDir, DirectoryChildVisitor childVisitor)
throws IOException, InterruptedException {
new DirectoryVisitor(rootDir, childVisitor).run();
}

private interface ExecLogEntrySupplier {
ExecLogEntry.Builder get() throws IOException;
ExecLogEntry.Builder get() throws IOException, InterruptedException;
}

private final PathFragment execRoot;
Expand Down Expand Up @@ -84,7 +155,7 @@ public CompactSpawnLogContext(
@Nullable RemoteOptions remoteOptions,
DigestHashFunction digestHashFunction,
XattrProvider xattrProvider)
throws IOException {
throws IOException, InterruptedException {
this.execRoot = execRoot;
this.remoteOptions = remoteOptions;
this.digestHashFunction = digestHashFunction;
Expand All @@ -101,7 +172,7 @@ private static MessageOutputStream<ExecLogEntry> getOutputStream(Path path) thro
path.toString(), new ZstdOutputStream(new BufferedOutputStream(path.getOutputStream())));
}

private void logInvocation() throws IOException {
private void logInvocation() throws IOException, InterruptedException {
logEntry(
null,
() ->
Expand All @@ -119,7 +190,7 @@ public void logSpawn(
FileSystem fileSystem,
Duration timeout,
SpawnResult result)
throws IOException, ExecException {
throws IOException, InterruptedException, ExecException {
try (SilentCloseable c = Profiler.instance().profile("logSpawn")) {
ExecLogEntry.Spawn.Builder builder = ExecLogEntry.Spawn.newBuilder();

Expand Down Expand Up @@ -187,7 +258,7 @@ public void logSpawn(
*/
private int logInputs(
Spawn spawn, InputMetadataProvider inputMetadataProvider, FileSystem fileSystem)
throws IOException {
throws IOException, InterruptedException {

// Add runfiles and filesets as additional direct members of the top-level nested set of inputs.
// This prevents it from being shared, but experimentally, the top-level input nested set for a
Expand Down Expand Up @@ -229,7 +300,7 @@ private int logInputs(
*/
private int logTools(
Spawn spawn, InputMetadataProvider inputMetadataProvider, FileSystem fileSystem)
throws IOException {
throws IOException, InterruptedException {
return logNestedSet(
spawn.getToolFiles(),
ImmutableList.of(),
Expand All @@ -254,7 +325,7 @@ private int logNestedSet(
InputMetadataProvider inputMetadataProvider,
FileSystem fileSystem,
boolean shared)
throws IOException {
throws IOException, InterruptedException {
if (set.isEmpty() && additionalDirectoryIds.isEmpty()) {
return 0;
}
Expand Down Expand Up @@ -308,7 +379,7 @@ private int logNestedSet(
* @return the entry ID of the {@link ExecLogEntry.File} describing the file.
*/
private int logFile(ActionInput input, Path path, InputMetadataProvider inputMetadataProvider)
throws IOException {
throws IOException, InterruptedException {
checkState(!(input instanceof VirtualActionInput.EmptyActionInput));

return logEntry(
Expand Down Expand Up @@ -347,7 +418,7 @@ private int logFile(ActionInput input, Path path, InputMetadataProvider inputMet
*/
private int logDirectory(
ActionInput input, Path root, InputMetadataProvider inputMetadataProvider)
throws IOException {
throws IOException, InterruptedException {
return logEntry(
input.getExecPathString(),
() ->
Expand Down Expand Up @@ -375,7 +446,7 @@ private int logRunfilesDirectory(
Map<PathFragment, Artifact> mapping,
InputMetadataProvider inputMetadataProvider,
FileSystem fileSystem)
throws IOException {
throws IOException, InterruptedException {
return logEntry(
root.getPathString(),
() -> {
Expand Down Expand Up @@ -423,38 +494,36 @@ private int logRunfilesDirectory(
* @param pathPrefix a prefix to prepend to each child path
* @return the list of files transitively contained in the directory
*/
private ImmutableList<ExecLogEntry.File> expandDirectory(
private List<ExecLogEntry.File> expandDirectory(
Path root, @Nullable String pathPrefix, InputMetadataProvider inputMetadataProvider)
throws IOException {
ImmutableList.Builder<ExecLogEntry.File> builder = ImmutableList.builder();

ArrayDeque<Path> dirs = new ArrayDeque<>();
dirs.add(root);

while (!dirs.isEmpty()) {
Path dir = dirs.removeFirst();
for (Path child : dir.getDirectoryEntries()) {
if (child.isDirectory()) {
dirs.addLast(child);
continue;
}
throws IOException, InterruptedException {
ArrayList<ExecLogEntry.File> files = new ArrayList<>();
visitDirectory(
root,
(child) -> {
String childPath = pathPrefix != null ? pathPrefix + "/" : "";
childPath += child.relativeTo(root).getPathString();

Digest digest =
computeDigest(
/* input= */ null,
child,
inputMetadataProvider,
xattrProvider,
digestHashFunction,
/* includeHashFunctionName= */ false);

ExecLogEntry.File file =
ExecLogEntry.File.newBuilder().setPath(childPath).setDigest(digest).build();

String childPath = pathPrefix != null ? pathPrefix + "/" : "";
childPath += child.relativeTo(root).getPathString();
synchronized (files) {
files.add(file);
}
});

Digest digest =
computeDigest(
/* input= */ null,
child,
inputMetadataProvider,
xattrProvider,
digestHashFunction,
/* includeHashFunctionName= */ false);
Collections.sort(files, EXEC_LOG_ENTRY_FILE_COMPARATOR);

builder.add(ExecLogEntry.File.newBuilder().setPath(childPath).setDigest(digest).build());
}
}
return builder.build();
return files;
}

/**
Expand All @@ -466,7 +535,8 @@ private ImmutableList<ExecLogEntry.File> expandDirectory(
* @return the entry ID of the {@link ExecLogEntry.UnresolvedSymlink} describing the unresolved
* symlink.
*/
private int logUnresolvedSymlink(ActionInput input, Path path) throws IOException {
private int logUnresolvedSymlink(ActionInput input, Path path)
throws IOException, InterruptedException {
return logEntry(
input.getExecPathString(),
() ->
Expand All @@ -489,7 +559,7 @@ private int logUnresolvedSymlink(ActionInput input, Path path) throws IOExceptio
*/
@CanIgnoreReturnValue
private synchronized int logEntry(@Nullable Object key, ExecLogEntrySupplier supplier)
throws IOException {
throws IOException, InterruptedException {
try (SilentCloseable c = Profiler.instance().profile("logEntry/synchronized")) {
if (key == null) {
// No need to check for a previously added entry.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,14 @@ public void testTreeOutput(

actualPath.createDirectoryAndParents();
if (!dirContents.isEmpty()) {
writeFile(actualPath.getChild("child"), "abc");
Path firstChildPath = actualPath.getRelative("dir1/file1");
Path secondChildPath = actualPath.getRelative("dir2/file2");
firstChildPath.getParentDirectory().createDirectoryAndParents();
secondChildPath.getParentDirectory().createDirectoryAndParents();
writeFile(firstChildPath, "abc");
writeFile(secondChildPath, "def");
Path emptySubdirPath = actualPath.getRelative("dir3");
emptySubdirPath.createDirectoryAndParents();
}

Spawn spawn = defaultSpawnBuilder().withOutputs(treeOutput).build();
Expand All @@ -531,8 +538,12 @@ public void testTreeOutput(
? ImmutableList.of()
: ImmutableList.of(
File.newBuilder()
.setPath("out/tree/child")
.setPath("out/tree/dir1/file1")
.setDigest(getDigest("abc"))
.build(),
File.newBuilder()
.setPath("out/tree/dir2/file2")
.setDigest(getDigest("def"))
.build()))
.build());
}
Expand Down

0 comments on commit 1c6c015

Please sign in to comment.