From 4e5e884879ec2017616b498cae6bcaa353e4ba50 Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 19 Mar 2024 06:21:27 -0700 Subject: [PATCH] Fix sandbox cleanup crashing after server restart We try to clean up the sandbox base from previous server instances asynchronously, however sometimes this is not possible due to the old directories being in a different filesystem. This can happen with overlays on Docker after running bazel in different RUN commands. See https://github.com/bazelbuild/bazel/issues/21719 This change fixes the crash by catching the IOException and falling back to synchronous deletion. Fixes #21719. RELNOTES:none PiperOrigin-RevId: 617150522 Change-Id: I82a07ac0ade66cfb1e5732a90a5f3ab4e2e8caa7 --- .../build/lib/sandbox/SandboxModule.java | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/SandboxModule.java b/src/main/java/com/google/devtools/build/lib/sandbox/SandboxModule.java index adc8ed58c62780..e3e8caf9401081 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/SandboxModule.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/SandboxModule.java @@ -227,27 +227,36 @@ private void setup(CommandEnvironment cmdEnv, SpawnStrategyRegistry.Builder buil // previous builds. However, on the very first build of an instance of the server, we must // wipe old contents to avoid reusing stale directories. if (firstBuild && sandboxBase.exists()) { - if (trashBase.exists()) { - // Delete stale trash from a previous server instance. - Path staleTrash = getStaleTrashDir(trashBase); - trashBase.renameTo(staleTrash); - trashBase.createDirectory(); - treeDeleter.deleteTree(staleTrash); - } else { - trashBase.createDirectory(); - } - // We can delete other dirs asynchronously (if the flag is on). - for (Path entry : sandboxBase.getDirectoryEntries()) { - if (entry.getBaseName().equals(AsynchronousTreeDeleter.MOVED_TRASH_DIR)) { - continue; - } - if (entry.getBaseName().equals(SandboxHelpers.INACCESSIBLE_HELPER_DIR)) { - entry.deleteTree(); - } else if (entry.isDirectory()) { - treeDeleter.deleteTree(entry); + try { + if (trashBase.exists()) { + // Delete stale trash from a previous server instance. + Path staleTrash = getStaleTrashDir(trashBase); + trashBase.renameTo(staleTrash); + trashBase.createDirectory(); + treeDeleter.deleteTree(staleTrash); } else { - entry.delete(); + trashBase.createDirectory(); } + // We can delete other dirs asynchronously (if the flag is on). + for (Path entry : sandboxBase.getDirectoryEntries()) { + if (entry.getBaseName().equals(AsynchronousTreeDeleter.MOVED_TRASH_DIR)) { + continue; + } + if (entry.getBaseName().equals(SandboxHelpers.INACCESSIBLE_HELPER_DIR)) { + entry.deleteTree(); + } else if (entry.isDirectory()) { + treeDeleter.deleteTree(entry); + } else { + entry.delete(); + } + } + } catch (IOException e) { + // We have observed asynchronous deletion failing when running Bazel under Docker, see + // #21719. Different RUN commands with `bazel build` will write to different layers in the + // docker image. The overlay filesystem is different and the renaming of the directories + // that we need to do for asynchronous deletion will fail. When that happens we fall back to + // synchronous deletion here. + sandboxBase.deleteTree(); } } firstBuild = false;