From 02d8d518e13786458029c7f332753057b70b340c Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 13 Jan 2025 13:49:58 +0100 Subject: [PATCH 1/3] Remove CompletableFuture from TempLocationManager (cherry picked from commit d0eed456eba6df98614010fbfa90f4f175cb6354) --- .../controller/TempLocationManager.java | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index c749b555cdf..09c0d308628 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -2,6 +2,7 @@ import datadog.trace.api.config.ProfilingConfig; import datadog.trace.bootstrap.config.provider.ConfigProvider; +import datadog.trace.util.AgentTaskScheduler; import datadog.trace.util.PidHelper; import java.io.IOException; import java.nio.file.FileVisitResult; @@ -15,10 +16,8 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,7 +61,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {} } - private class CleanupVisitor implements FileVisitor { + private final class CleanupVisitor implements FileVisitor { private boolean shouldClean; private final Set pidSet = PidHelper.getJavaPids(); @@ -173,12 +172,37 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx } } + private final class CleanupTask implements Runnable { + private final CountDownLatch latch = new CountDownLatch(1); + private volatile Throwable throwable = null; + + @Override + public void run() { + try { + cleanup(false); + } catch (OutOfMemoryError oom) { + throw oom; + } catch (Throwable t) { + throwable = t; + } finally { + latch.countDown(); + } + } + + boolean await(long timeout, TimeUnit unit) throws Throwable { + boolean ret = latch.await(timeout, unit); + if (throwable != null) { + throw throwable; + } + return ret; + } + } + private final Path baseTempDir; private final Path tempDir; private final long cutoffSeconds; - private final CompletableFuture cleanupTask; - + private final CleanupTask cleanupTask = new CleanupTask(); private final CleanupHook cleanupTestHook; /** @@ -200,11 +224,7 @@ public static TempLocationManager getInstance() { static TempLocationManager getInstance(boolean waitForCleanup) { TempLocationManager instance = SingletonHolder.INSTANCE; if (waitForCleanup) { - try { - instance.waitForCleanup(5, TimeUnit.SECONDS); - } catch (TimeoutException ignored) { - - } + instance.waitForCleanup(5, TimeUnit.SECONDS); } return instance; } @@ -256,20 +276,17 @@ private TempLocationManager() { baseTempDir.toFile().deleteOnExit(); tempDir = baseTempDir.resolve("pid_" + pid); - cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); + AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false)); Thread selfCleanup = new Thread( () -> { - try { - waitForCleanup(1, TimeUnit.SECONDS); - } catch (TimeoutException e) { + if (!waitForCleanup(1, TimeUnit.SECONDS)) { log.info( "Cleanup task timed out. {} temp directory might not have been cleaned up properly", tempDir); - } finally { - cleanup(true); } + cleanup(true); }, "Temp Location Manager Cleanup"); Runtime.getRuntime().addShutdownHook(selfCleanup); @@ -359,21 +376,19 @@ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) { } // accessible for tests - void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException { + boolean waitForCleanup(long timeout, TimeUnit unit) { try { - cleanupTask.get(timeout, unit); + return cleanupTask.await(timeout, unit); } catch (InterruptedException e) { - cleanupTask.cancel(true); + log.debug("Temp directory cleanup was interrupted"); Thread.currentThread().interrupt(); - } catch (TimeoutException e) { - cleanupTask.cancel(true); - throw e; - } catch (ExecutionException e) { + } catch (Throwable t) { if (log.isDebugEnabled()) { - log.debug("Failed to cleanup temp directory: {}", tempDir, e); + log.debug("Failed to cleanup temp directory: {}", tempDir, t); } else { log.debug("Failed to cleanup temp directory: {}", tempDir); } } + return false; } } From 6e98a97fcf82b8900d940cfc6491fd48bee1728d Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 13 Jan 2025 17:21:03 +0100 Subject: [PATCH 2/3] Do not run the cleanup if there is nothing to clean up (cherry picked from commit 58d5da854fff4aea03f2b6efe749c2e802d898c6) --- .../controller/TempLocationManager.java | 37 ++++++++++--- .../controller/TempLocationManagerTest.java | 53 +++++++++++++++---- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index 09c0d308628..30d009f69cd 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -18,6 +18,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,6 +30,8 @@ */ public final class TempLocationManager { private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class); + // accessible to tests + static final String TEMPDIR_PREFIX = "pid_"; private static final class SingletonHolder { private static final TempLocationManager INSTANCE = new TempLocationManager(); @@ -104,7 +107,7 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) } String fileName = dir.getFileName().toString(); // the JFR repository directories are under /pid_ - String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null; + String pid = fileName.startsWith(TEMPDIR_PREFIX) ? fileName.substring(4) : null; boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid()); shouldClean |= cleanSelf ? isSelfPid : !isSelfPid && !pidSet.contains(pid); if (shouldClean) { @@ -166,7 +169,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx } String fileName = dir.getFileName().toString(); // reset the flag only if we are done cleaning the top-level directory - shouldClean = !fileName.startsWith("pid_"); + shouldClean = !fileName.startsWith(TEMPDIR_PREFIX); } return FileVisitResult.CONTINUE; } @@ -234,10 +237,11 @@ private TempLocationManager() { } TempLocationManager(ConfigProvider configProvider) { - this(configProvider, CleanupHook.EMPTY); + this(configProvider, true, CleanupHook.EMPTY); } - TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) { + TempLocationManager( + ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) { cleanupTestHook = testHook; // In order to avoid racy attempts to clean up files which are currently being processed in a @@ -275,8 +279,11 @@ private TempLocationManager() { baseTempDir = configuredTempDir.resolve("ddprof"); baseTempDir.toFile().deleteOnExit(); - tempDir = baseTempDir.resolve("pid_" + pid); - AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false)); + tempDir = baseTempDir.resolve(TEMPDIR_PREFIX + pid); + if (runStartupCleanup) { + // do not execute the background cleanup task when running in tests + AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false)); + } Thread selfCleanup = new Thread( @@ -361,6 +368,19 @@ boolean cleanup(boolean cleanSelf) { */ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) { try { + if (!Files.exists(baseTempDir)) { + // not event the main temp location exists; nothing to clean up + return true; + } + try (Stream paths = Files.walk(baseTempDir)) { + if (paths.noneMatch( + path -> + Files.isDirectory(path) + && path.getFileName().toString().startsWith(TEMPDIR_PREFIX))) { + // nothing to clean up; bail out early + return true; + } + } cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit); CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit); Files.walkFileTree(baseTempDir, visitor); @@ -391,4 +411,9 @@ boolean waitForCleanup(long timeout, TimeUnit unit) { } return false; } + + // accessible for tests + void createDirStructure() throws IOException { + Files.createDirectories(baseTempDir); + } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java index fdc839ffaae..1dc3cbbaa9d 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -18,6 +18,7 @@ import java.util.UUID; import java.util.concurrent.Phaser; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.LockSupport; import java.util.stream.Stream; import org.junit.jupiter.api.Test; @@ -173,7 +174,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx } }; - TempLocationManager mgr = instance(baseDir, blocker); + TempLocationManager mgr = instance(baseDir, true, blocker); // wait for the cleanup start phaser.arriveAndAwaitAdvance(); @@ -187,8 +188,9 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx @ParameterizedTest @MethodSource("timeoutTestArguments") - void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception { - long timeoutMs = 500; + void testCleanupWithTimeout(boolean selfCleanup, boolean shouldSucceed, String section) + throws Exception { + long timeoutMs = 10; TempLocationManager.CleanupHook delayer = new TempLocationManager.CleanupHook() { @Override @@ -229,9 +231,39 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) Files.createTempDirectory( "ddprof-test-", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); - TempLocationManager instance = instance(baseDir, delayer); - boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS); - assertTrue(rslt); + TempLocationManager instance = instance(baseDir, false, delayer); + Path mytempdir = instance.getTempDir(); + Path otherTempdir = mytempdir.getParent().resolve("pid_0000"); + Files.createDirectories(otherTempdir); + Files.createFile(mytempdir.resolve("dummy")); + Files.createFile(otherTempdir.resolve("dummy")); + boolean rslt = + instance.cleanup( + selfCleanup, (long) (timeoutMs * (shouldSucceed ? 10 : 0.5d)), TimeUnit.MILLISECONDS); + assertEquals(shouldSucceed, rslt); + } + + @Test + void testShortCircuit() throws Exception { + Path baseDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + AtomicBoolean executed = new AtomicBoolean(); + TempLocationManager.CleanupHook hook = + new TempLocationManager.CleanupHook() { + @Override + public void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) { + executed.set(true); + } + }; + TempLocationManager instance = instance(baseDir, false, hook); + + instance.createDirStructure(); + + boolean ret = instance.cleanup(false); + assertTrue(ret); + assertFalse(executed.get()); } private static Stream timeoutTestArguments() { @@ -239,13 +271,15 @@ private static Stream timeoutTestArguments() { for (boolean selfCleanup : new boolean[] {true, false}) { for (String intercepted : new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) { - argumentsList.add(Arguments.of(selfCleanup, intercepted)); + argumentsList.add(Arguments.of(selfCleanup, true, intercepted)); + argumentsList.add(Arguments.of(selfCleanup, false, intercepted)); } } return argumentsList.stream(); } - private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook) + private TempLocationManager instance( + Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook) throws IOException { Properties props = new Properties(); props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString()); @@ -253,6 +287,7 @@ private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHo ProfilingConfig.PROFILING_UPLOAD_PERIOD, "0"); // to force immediate cleanup; must be a string value! - return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook); + return new TempLocationManager( + ConfigProvider.withPropertiesOverride(props), withStartupCleanup, cleanupHook); } } From 2f6f74e2e3a93e67e44ab9cbd84d4b5608571f80 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Mon, 13 Jan 2025 17:27:45 +0100 Subject: [PATCH 3/3] Exclude JFR repository from temp files self-cleanup (cherry picked from commit a7709116e4463ee93414addd364e9213b7e5df9c) --- .../profiling/controller/TempLocationManager.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java index 30d009f69cd..ec15b1552cd 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -18,6 +18,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,8 +31,9 @@ */ public final class TempLocationManager { private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class); - // accessible to tests - static final String TEMPDIR_PREFIX = "pid_"; + private static final Pattern JFR_DIR_PATTERN = + Pattern.compile("\\d{4}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{6}"); + private static final String TEMPDIR_PREFIX = "pid_"; private static final class SingletonHolder { private static final TempLocationManager INSTANCE = new TempLocationManager(); @@ -100,6 +102,11 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) terminated = true; return FileVisitResult.TERMINATE; } + if (cleanSelf && JFR_DIR_PATTERN.matcher(dir.getFileName().toString()).matches()) { + // do not delete JFR repository on 'self-cleanup' - it conflicts with the JFR's own cleanup + return FileVisitResult.SKIP_SUBTREE; + } + cleanupTestHook.preVisitDirectory(dir, attrs); if (dir.equals(baseTempDir)) {