Skip to content

Commit e482e8b

Browse files
oquenchilcopybara-github
authored andcommitted
Fix stale trash dir not cleaned up on worker creation
The bug in Blaze is that ${output_base}/blaze-workers/_moved_trash_dir/ doesn't get wiped between blaze restarts. The directory to move to for asynchronous deletion is an incrementing AtomicInteger but after a restart it will be zero again. If there were previous directories there, we might get an error trying to replace a non-empty directory while renaming. RELNOTES:none PiperOrigin-RevId: 610323627 Change-Id: I2d397fbd4590fa9f83273ac11d92d591bf9348b8
1 parent 9d34f8a commit e482e8b

File tree

5 files changed

+67
-7
lines changed

5 files changed

+67
-7
lines changed

src/main/java/com/google/devtools/build/lib/sandbox/AsynchronousTreeDeleter.java

+4
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,8 @@ public void shutdown() {
110110
service = null;
111111
}
112112
}
113+
114+
public Path getTrashBase() {
115+
return trashBase;
116+
}
113117
}

src/main/java/com/google/devtools/build/lib/worker/BUILD

+1-2
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ java_library(
104104
"//src/main/java/com/google/devtools/build/lib/exec:execution_options",
105105
"//src/main/java/com/google/devtools/build/lib/exec:runfiles_tree_updater",
106106
"//src/main/java/com/google/devtools/build/lib/exec:spawn_strategy_registry",
107-
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
108107
"//src/main/java/com/google/devtools/build/lib/exec/local",
109108
"//src/main/java/com/google/devtools/build/lib/runtime/commands/events",
110109
"//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
@@ -193,7 +192,7 @@ java_library(
193192
":worker_options",
194193
":worker_process_status",
195194
"//src/main/java/com/google/devtools/build/lib/events",
196-
"//src/main/java/com/google/devtools/build/lib/exec:tree_deleter",
195+
"//src/main/java/com/google/devtools/build/lib/sandbox:tree_deleter",
197196
"//src/main/java/com/google/devtools/build/lib/vfs",
198197
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
199198
"//src/main/protobuf:failure_details_java_proto",

src/main/java/com/google/devtools/build/lib/worker/WorkerFactory.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import com.google.common.io.BaseEncoding;
1818
import com.google.devtools.build.lib.events.Event;
1919
import com.google.devtools.build.lib.events.Reporter;
20-
import com.google.devtools.build.lib.exec.TreeDeleter;
20+
import com.google.devtools.build.lib.sandbox.AsynchronousTreeDeleter;
2121
import com.google.devtools.build.lib.server.FailureDetails.Worker.Code;
2222
import com.google.devtools.build.lib.vfs.Path;
2323
import com.google.devtools.build.lib.vfs.PathFragment;
@@ -49,7 +49,7 @@ public class WorkerFactory extends BaseKeyedPooledObjectFactory<WorkerKey, Worke
4949
protected final WorkerOptions workerOptions;
5050

5151
private final Path workerBaseDir;
52-
private final TreeDeleter treeDeleter;
52+
private final AsynchronousTreeDeleter treeDeleter;
5353
private Reporter reporter;
5454

5555
/**
@@ -66,7 +66,7 @@ public WorkerFactory(
6666
Path workerBaseDir,
6767
WorkerOptions workerOptions,
6868
@Nullable WorkerSandboxOptions hardenedSandboxOptions,
69-
@Nullable TreeDeleter treeDeleter) {
69+
@Nullable AsynchronousTreeDeleter treeDeleter) {
7070
this.workerBaseDir = workerBaseDir;
7171
this.workerOptions = workerOptions;
7272
this.hardenedSandboxOptions = hardenedSandboxOptions;
@@ -83,6 +83,10 @@ public Worker create(WorkerKey key) throws IOException {
8383
String workTypeName = key.getWorkerTypeName();
8484
if (!workerBaseDir.isDirectory()) {
8585
workerBaseDir.createDirectoryAndParents();
86+
Path deleterTrashBase = treeDeleter == null ? null : treeDeleter.getTrashBase();
87+
if (deleterTrashBase != null) {
88+
deleterTrashBase.createDirectory();
89+
}
8690
}
8791
Path logFile =
8892
workerBaseDir.getRelative(workTypeName + "-" + workerId + "-" + key.getMnemonic() + ".log");

src/main/java/com/google/devtools/build/lib/worker/WorkerModule.java

+28-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import com.google.devtools.build.lib.exec.ExecutionOptions;
2626
import com.google.devtools.build.lib.exec.RunfilesTreeUpdater;
2727
import com.google.devtools.build.lib.exec.SpawnStrategyRegistry;
28-
import com.google.devtools.build.lib.exec.TreeDeleter;
2928
import com.google.devtools.build.lib.exec.local.LocalEnvProvider;
3029
import com.google.devtools.build.lib.runtime.BlazeModule;
3130
import com.google.devtools.build.lib.runtime.BlazeWorkspace;
@@ -46,10 +45,12 @@
4645

4746
/** A module that adds the WorkerActionContextProvider to the available action context providers. */
4847
public class WorkerModule extends BlazeModule {
48+
49+
private static final String STALE_TRASH = "_stale_trash";
4950
private CommandEnvironment env;
5051

5152
private WorkerFactory workerFactory;
52-
private TreeDeleter treeDeleter;
53+
private AsynchronousTreeDeleter treeDeleter;
5354
@VisibleForTesting WorkerPoolImpl workerPool;
5455
@Nullable private WorkerLifecycleManager workerLifecycleManager;
5556

@@ -114,6 +115,9 @@ public void buildStarting(BuildStartingEvent event) {
114115
Path trashBase = workerDir.getRelative(AsynchronousTreeDeleter.MOVED_TRASH_DIR);
115116
if (treeDeleter == null) {
116117
treeDeleter = new AsynchronousTreeDeleter(trashBase);
118+
if (trashBase.exists()) {
119+
removeStaleTrash(workerDir, trashBase);
120+
}
117121
}
118122
WorkerFactory newWorkerFactory =
119123
new WorkerFactory(workerDir, options, workerSandboxOptions, treeDeleter);
@@ -185,6 +189,28 @@ public void buildStarting(BuildStartingEvent event) {
185189
workerPool.reset();
186190
}
187191

192+
private void removeStaleTrash(Path workerDir, Path trashBase) {
193+
try {
194+
// The AsynchronousTreeDeleter relies on a counter for naming directories that will be
195+
// moved out of the way before being deleted asynchronously.
196+
// If there is trash on disk from a previous bazel server instance, the dirs will have
197+
// names not synced with the counter, therefore we may run the risk of moving a directory
198+
// in this server instance to a path of an existing directory. To solve this we rename
199+
// the trash directory that was on disk, create a new empty trash directory and delete
200+
// the old trash via the AsynchronousTreeDeleter. Before deletion the stale trash will be
201+
// moved to a directory named `0` under MOVED_TRASH_DIR.
202+
Path staleTrash = trashBase.getParentDirectory().getChild(STALE_TRASH);
203+
trashBase.renameTo(staleTrash);
204+
trashBase.createDirectory();
205+
treeDeleter.deleteTree(staleTrash);
206+
} catch (IOException e) {
207+
env.getReporter()
208+
.handle(
209+
Event.error(
210+
String.format("Could not trash dir in '%s': %s", workerDir, e.getMessage())));
211+
}
212+
}
213+
188214
@Override
189215
public void registerSpawnStrategies(
190216
SpawnStrategyRegistry.Builder registryBuilder, CommandEnvironment env) {

src/test/java/com/google/devtools/build/lib/worker/WorkerModuleTest.java

+27
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.google.devtools.build.lib.runtime.BlazeRuntime;
3636
import com.google.devtools.build.lib.runtime.BlazeWorkspace;
3737
import com.google.devtools.build.lib.runtime.CommandEnvironment;
38+
import com.google.devtools.build.lib.sandbox.AsynchronousTreeDeleter;
3839
import com.google.devtools.build.lib.util.AbruptExitException;
3940
import com.google.devtools.build.lib.vfs.DigestHashFunction;
4041
import com.google.devtools.build.lib.vfs.FileSystem;
@@ -262,6 +263,32 @@ public void buildStarting_survivesNoWorkerDir() throws Exception {
262263
assertThrows(IOException.class, () -> module.workerPool.borrowObject(key));
263264
}
264265

266+
@Test
267+
public void buildStarting_cleansStaleTrashDirCleanedOnFirstBuild() throws Exception {
268+
WorkerModule module = new WorkerModule();
269+
WorkerOptions options = WorkerOptions.DEFAULTS;
270+
271+
when(request.getOptions(WorkerOptions.class)).thenReturn(options);
272+
setupEnvironment("/outputRoot");
273+
274+
module.beforeCommand(env);
275+
Path workerDir = fs.getPath("/outputRoot/outputBase/bazel-workers");
276+
Path trashBase = workerDir.getChild(AsynchronousTreeDeleter.MOVED_TRASH_DIR);
277+
trashBase.createDirectoryAndParents();
278+
279+
Path staleTrash = trashBase.getChild("random-trash");
280+
281+
staleTrash.createDirectoryAndParents();
282+
module.buildStarting(BuildStartingEvent.create(env, request));
283+
// Trash is cleaned upon first build.
284+
assertThat(staleTrash.exists()).isFalse();
285+
286+
staleTrash.createDirectoryAndParents();
287+
module.buildStarting(BuildStartingEvent.create(env, request));
288+
// Trash is not cleaned upon subsequent builds.
289+
assertThat(staleTrash.exists()).isTrue();
290+
}
291+
265292
private void setupEnvironment(String rootDir) throws IOException, AbruptExitException {
266293
storedEventHandler = new StoredEventHandler();
267294
Path root = fs.getPath(rootDir);

0 commit comments

Comments
 (0)